0230587040
Write api-<version>.md with version metadata in each file and index, sanitize output for safe paths/markdown, atomically replace exports, and add --keep-old-versions to retain prior exports as an archive.
563 lines
17 KiB
Python
Executable File
563 lines
17 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Export Markdown API docs for packages listed in requirements.txt.
|
|
|
|
Creates docs/<import-module>/api-<version>.md under the current working directory using a
|
|
temporary virtualenv in /tmp (does not install into the system Python).
|
|
|
|
Supported requirements lines: PEP 508 name specs and ``-r`` includes (must stay
|
|
under the directory of the top-level requirements file). Lines like ``-e``,
|
|
direct URLs, and other pip options are installed by pip but not exported.
|
|
|
|
Example:
|
|
./scripts/export-api-docs.py requirements.txt
|
|
./scripts/export-api-docs.py /path/to/requirements.txt --recreate-venv
|
|
./scripts/export-api-docs.py requirements.txt --strict
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import hashlib
|
|
import importlib.metadata as im
|
|
import os
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Pin for reproducible exports (pydoc-markdown 4.x API).
|
|
PYDOC_MARKDOWN_SPEC = "pydoc-markdown>=4.0,<5"
|
|
|
|
# PyPI distribution name (normalized) -> import roots when metadata is missing.
|
|
PACKAGE_MODULE_FALLBACKS: dict[str, list[str]] = {
|
|
"python-gitlab": ["gitlab"],
|
|
"python-redmine": ["redminelib"],
|
|
"pyyaml": ["yaml"],
|
|
"pillow": ["PIL"],
|
|
}
|
|
|
|
MODULE_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
|
|
REQUIREMENTS_HASH_FILE = ".requirements.sha256"
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(
|
|
description="Export Markdown API docs for requirements.txt packages.",
|
|
)
|
|
parser.add_argument(
|
|
"requirements",
|
|
type=Path,
|
|
help="Path to requirements.txt",
|
|
)
|
|
parser.add_argument(
|
|
"--venv-dir",
|
|
type=Path,
|
|
default=None,
|
|
help="Virtualenv location (default: /tmp/export-api-docs-<uid>-<hash>)",
|
|
)
|
|
parser.add_argument(
|
|
"--recreate-venv",
|
|
action="store_true",
|
|
help="Remove and recreate the temporary virtualenv",
|
|
)
|
|
parser.add_argument(
|
|
"--strict",
|
|
action="store_true",
|
|
help="Exit with failure if any package or module export fails",
|
|
)
|
|
parser.add_argument(
|
|
"--keep-old-versions",
|
|
action="store_true",
|
|
help="Keep previous api-<version>.md and legacy api.md files in each module directory",
|
|
)
|
|
return parser.parse_args()
|
|
|
|
|
|
def normalize_dist_name(name: str) -> str:
|
|
return re.sub(r"[-_.]+", "-", name).lower()
|
|
|
|
|
|
def parse_requirements_file(
|
|
path: Path,
|
|
seen: set[Path] | None = None,
|
|
anchor_dir: Path | None = None,
|
|
) -> list[str]:
|
|
"""Return normalized PyPI distribution names from a requirements file."""
|
|
path = path.resolve()
|
|
if seen is None:
|
|
seen = set()
|
|
if anchor_dir is None:
|
|
anchor_dir = path.parent.resolve()
|
|
if path in seen:
|
|
return []
|
|
seen.add(path)
|
|
|
|
names: list[str] = []
|
|
for raw in path.read_text(encoding="utf-8").splitlines():
|
|
line = raw.split("#", 1)[0].strip()
|
|
if not line:
|
|
continue
|
|
if line.startswith("-r"):
|
|
parts = line.split(maxsplit=1)
|
|
if len(parts) != 2:
|
|
continue
|
|
include = (path.parent / parts[1]).resolve()
|
|
try:
|
|
include.relative_to(anchor_dir)
|
|
except ValueError:
|
|
print(
|
|
f"warning: skipping -r outside requirements tree: {include}",
|
|
file=sys.stderr,
|
|
)
|
|
continue
|
|
if not include.is_file():
|
|
print(f"warning: requirements include not found: {include}", file=sys.stderr)
|
|
continue
|
|
names.extend(parse_requirements_file(include, seen, anchor_dir))
|
|
continue
|
|
if line.startswith("-"):
|
|
continue
|
|
|
|
line = re.split(r"[;@]", line, maxsplit=1)[0].strip()
|
|
line = re.sub(r"\[.*\]", "", line).strip()
|
|
match = re.match(r"^([A-Za-z0-9][A-Za-z0-9._-]*)", line)
|
|
if match:
|
|
names.append(normalize_dist_name(match.group(1)))
|
|
return names
|
|
|
|
|
|
def default_venv_dir() -> Path:
|
|
digest = hashlib.sha256(str(Path.cwd().resolve()).encode()).hexdigest()[:12]
|
|
return Path("/tmp") / f"export-api-docs-{os.getuid()}-{digest}"
|
|
|
|
|
|
def venv_python(venv: Path) -> Path:
|
|
if os.name == "nt":
|
|
return venv / "Scripts" / "python.exe"
|
|
return venv / "bin" / "python"
|
|
|
|
|
|
def venv_executable(venv: Path, name: str) -> Path:
|
|
if os.name == "nt":
|
|
return venv / "Scripts" / f"{name}.exe"
|
|
return venv / "bin" / name
|
|
|
|
|
|
def run(cmd: list[str], **kwargs) -> None:
|
|
subprocess.run(cmd, check=True, **kwargs)
|
|
|
|
|
|
def check_venv_module() -> None:
|
|
try:
|
|
subprocess.run(
|
|
[sys.executable, "-m", "venv", "--help"],
|
|
check=True,
|
|
capture_output=True,
|
|
)
|
|
except (subprocess.CalledProcessError, FileNotFoundError) as exc:
|
|
print(
|
|
"error: python venv module unavailable "
|
|
f"(install python3-venv): {exc}",
|
|
file=sys.stderr,
|
|
)
|
|
sys.exit(1)
|
|
|
|
|
|
def collect_requirements_files(
|
|
path: Path,
|
|
seen: set[Path] | None = None,
|
|
anchor_dir: Path | None = None,
|
|
) -> list[Path]:
|
|
"""Return all requirements files reachable via -r from path (in stable order)."""
|
|
path = path.resolve()
|
|
if seen is None:
|
|
seen = set()
|
|
if anchor_dir is None:
|
|
anchor_dir = path.parent.resolve()
|
|
if path in seen:
|
|
return []
|
|
seen.add(path)
|
|
|
|
files = [path]
|
|
for raw in path.read_text(encoding="utf-8").splitlines():
|
|
line = raw.split("#", 1)[0].strip()
|
|
if not line.startswith("-r"):
|
|
continue
|
|
parts = line.split(maxsplit=1)
|
|
if len(parts) != 2:
|
|
continue
|
|
include = (path.parent / parts[1]).resolve()
|
|
try:
|
|
include.relative_to(anchor_dir)
|
|
except ValueError:
|
|
continue
|
|
if include.is_file():
|
|
files.extend(collect_requirements_files(include, seen, anchor_dir))
|
|
return files
|
|
|
|
|
|
def requirements_tree_digest(root: Path) -> str:
|
|
digest = hashlib.sha256()
|
|
anchor = root.parent.resolve()
|
|
for req_file in sorted(collect_requirements_files(root, anchor_dir=anchor)):
|
|
digest.update(str(req_file).encode())
|
|
digest.update(req_file.read_bytes())
|
|
return digest.hexdigest()
|
|
|
|
|
|
def venv_needs_install(venv: Path, requirements: Path, recreate: bool) -> bool:
|
|
if recreate or not venv.is_dir():
|
|
return True
|
|
stamp = venv / REQUIREMENTS_HASH_FILE
|
|
if not stamp.is_file():
|
|
return True
|
|
return stamp.read_text(encoding="utf-8").strip() != requirements_tree_digest(requirements)
|
|
|
|
|
|
def ensure_venv(venv: Path, requirements: Path, recreate: bool) -> Path:
|
|
check_venv_module()
|
|
py = venv_python(venv)
|
|
if recreate and venv.exists():
|
|
shutil.rmtree(venv)
|
|
|
|
if not venv.exists():
|
|
print(f"Creating virtualenv: {venv}", file=sys.stderr)
|
|
run([sys.executable, "-m", "venv", str(venv)])
|
|
|
|
if venv_needs_install(venv, requirements, recreate=False):
|
|
print("Installing packages into temporary virtualenv…", file=sys.stderr)
|
|
run([str(py), "-m", "pip", "install", "-q", "-U", "pip", "setuptools", "wheel"])
|
|
run(
|
|
[
|
|
str(py),
|
|
"-m",
|
|
"pip",
|
|
"install",
|
|
"-q",
|
|
PYDOC_MARKDOWN_SPEC,
|
|
"-r",
|
|
str(requirements.resolve()),
|
|
],
|
|
)
|
|
(venv / REQUIREMENTS_HASH_FILE).write_text(
|
|
requirements_tree_digest(requirements),
|
|
encoding="utf-8",
|
|
)
|
|
elif not venv_executable(venv, "pydoc-markdown").is_file():
|
|
print("Installing pydoc-markdown into virtualenv…", file=sys.stderr)
|
|
run([str(py), "-m", "pip", "install", "-q", PYDOC_MARKDOWN_SPEC])
|
|
|
|
return py
|
|
|
|
|
|
def site_packages(py: Path) -> Path:
|
|
code = """
|
|
import site
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
paths = site.getsitepackages()
|
|
if not paths:
|
|
for entry in sys.path:
|
|
if entry.endswith("site-packages"):
|
|
paths.append(entry)
|
|
if not paths:
|
|
ver = f"{sys.version_info.major}.{sys.version_info.minor}"
|
|
guess = Path(sys.prefix) / "lib" / f"python{ver}" / "site-packages"
|
|
if guess.is_dir():
|
|
paths.append(str(guess))
|
|
if not paths:
|
|
raise SystemExit("no site-packages directory found")
|
|
print(paths[0])
|
|
"""
|
|
out = subprocess.check_output([str(py), "-c", code], text=True).strip()
|
|
return Path(out)
|
|
|
|
|
|
def is_safe_module_name(module: str) -> bool:
|
|
return MODULE_RE.fullmatch(module) is not None
|
|
|
|
|
|
def distribution_version(dist: im.Distribution) -> str:
|
|
try:
|
|
version = dist.version
|
|
if version:
|
|
return version
|
|
except (im.PackageNotFoundError, AttributeError):
|
|
pass
|
|
return "unknown"
|
|
|
|
|
|
def safe_metadata_value(value: str) -> str:
|
|
cleaned = value.strip()
|
|
if not cleaned:
|
|
return "unknown"
|
|
cleaned = re.sub(r"[\x00-\x1f\x7f]", "", cleaned)
|
|
return cleaned.replace("-->", "-- >")
|
|
|
|
|
|
def safe_markdown_inline(value: str) -> str:
|
|
return (
|
|
safe_metadata_value(value)
|
|
.replace("\\", "\\\\")
|
|
.replace("`", "\\`")
|
|
.replace("|", "\\|")
|
|
)
|
|
|
|
|
|
def safe_version_for_filename(version: str) -> str:
|
|
cleaned = re.sub(r"[^\w.+-]", "_", safe_metadata_value(version))
|
|
return cleaned or "unknown"
|
|
|
|
|
|
def export_doc_header(*, package_name: str, package_version: str, module: str) -> str:
|
|
name = safe_markdown_inline(package_name)
|
|
version = safe_markdown_inline(package_version)
|
|
module_name = safe_markdown_inline(module)
|
|
return (
|
|
f"<!-- py-export-api-docs: package={safe_metadata_value(package_name)} "
|
|
f"version={safe_metadata_value(package_version)} "
|
|
f"module={safe_metadata_value(module)} -->\n\n"
|
|
f"> **Source:** `{name}` **{version}** "
|
|
f"(import: `{module_name}`)\n\n"
|
|
"---\n\n"
|
|
)
|
|
|
|
|
|
def remove_stale_api_exports(out_dir: Path, keep: Path) -> None:
|
|
for pattern in ("api-*.md", "api.md"):
|
|
for old in out_dir.glob(pattern):
|
|
if old != keep:
|
|
old.unlink()
|
|
|
|
|
|
def safe_output_dir(docs_root: Path, module: str) -> Path | None:
|
|
if not is_safe_module_name(module):
|
|
return None
|
|
docs_resolved = docs_root.resolve()
|
|
out_dir = (docs_root / module).resolve()
|
|
try:
|
|
out_dir.relative_to(docs_resolved)
|
|
except ValueError:
|
|
return None
|
|
return out_dir
|
|
|
|
|
|
def import_modules_for_distribution(dist: im.Distribution) -> list[str]:
|
|
modules: list[str] = []
|
|
try:
|
|
text = dist.read_text("top_level.txt")
|
|
except (FileNotFoundError, OSError, TypeError):
|
|
text = None
|
|
if text:
|
|
modules = [
|
|
line.strip()
|
|
for line in text.splitlines()
|
|
if line.strip() and not line.strip().startswith("_")
|
|
]
|
|
if modules:
|
|
return sorted({m for m in modules if is_safe_module_name(m)})
|
|
|
|
key = normalize_dist_name(dist.metadata.get("Name", ""))
|
|
if key in PACKAGE_MODULE_FALLBACKS:
|
|
return PACKAGE_MODULE_FALLBACKS[key]
|
|
|
|
guess = key.replace("-", "_")
|
|
if is_safe_module_name(guess):
|
|
print(
|
|
f"warning: guessing import module {guess!r} for {key!r} "
|
|
"(no top_level.txt); may be wrong",
|
|
file=sys.stderr,
|
|
)
|
|
return [guess]
|
|
return []
|
|
|
|
|
|
def export_module(
|
|
pydoc_markdown: Path,
|
|
site_pkgs: Path,
|
|
module: str,
|
|
out_dir: Path,
|
|
*,
|
|
package_name: str,
|
|
package_version: str,
|
|
keep_old_versions: bool = False,
|
|
) -> Path:
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
version_tag = safe_version_for_filename(package_version)
|
|
api_md = out_dir / f"api-{version_tag}.md"
|
|
header = export_doc_header(
|
|
package_name=package_name,
|
|
package_version=package_version,
|
|
module=module,
|
|
)
|
|
body_tmp = out_dir / f".{module}.pydoc.tmp"
|
|
export_tmp = out_dir / f".{module}.export.tmp"
|
|
try:
|
|
with body_tmp.open("w", encoding="utf-8") as handle:
|
|
subprocess.run(
|
|
[
|
|
str(pydoc_markdown),
|
|
"-p",
|
|
module,
|
|
"-I",
|
|
str(site_pkgs),
|
|
"-q",
|
|
],
|
|
check=True,
|
|
stdout=handle,
|
|
)
|
|
if body_tmp.stat().st_size == 0:
|
|
raise RuntimeError(f"pydoc-markdown produced no output for {module!r}")
|
|
with export_tmp.open("w", encoding="utf-8") as handle:
|
|
handle.write(header)
|
|
with body_tmp.open("r", encoding="utf-8") as body:
|
|
shutil.copyfileobj(body, handle)
|
|
os.replace(export_tmp, api_md)
|
|
if not keep_old_versions:
|
|
remove_stale_api_exports(out_dir, api_md)
|
|
finally:
|
|
body_tmp.unlink(missing_ok=True)
|
|
export_tmp.unlink(missing_ok=True)
|
|
return api_md
|
|
|
|
|
|
def write_docs_index(docs_root: Path, exports: list[tuple[str, str, Path]]) -> None:
|
|
lines = [
|
|
"# API documentation (exported)",
|
|
"",
|
|
"Generated from `requirements.txt` via `py-export-api-docs.py`.",
|
|
"",
|
|
"| Module | Version | File |",
|
|
"|--------|---------|------|",
|
|
]
|
|
for module, version, api_md in sorted(exports):
|
|
rel = api_md.relative_to(docs_root)
|
|
safe_version = safe_markdown_inline(version)
|
|
lines.append(f"| `{module}` | `{safe_version}` | [{rel}]({rel}) |")
|
|
(docs_root / "README.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
|
|
|
|
def distribution_for_requirement_in_venv(
|
|
name: str,
|
|
site_pkgs: Path,
|
|
) -> im.Distribution | None:
|
|
for dist in im.distributions(path=[str(site_pkgs)]):
|
|
meta_name = dist.metadata.get("Name", "")
|
|
if normalize_dist_name(meta_name) == name:
|
|
return dist
|
|
return None
|
|
|
|
|
|
def main() -> int:
|
|
args = parse_args()
|
|
requirements = args.requirements.resolve()
|
|
if not requirements.is_file():
|
|
print(f"error: requirements file not found: {requirements}", file=sys.stderr)
|
|
return 1
|
|
|
|
venv = args.venv_dir or default_venv_dir()
|
|
docs_root = Path.cwd() / "docs"
|
|
docs_root.mkdir(parents=True, exist_ok=True)
|
|
|
|
py = ensure_venv(venv, requirements, args.recreate_venv)
|
|
pydoc_markdown = venv_executable(venv, "pydoc-markdown")
|
|
if not pydoc_markdown.is_file():
|
|
print(f"error: pydoc-markdown not found in {venv}", file=sys.stderr)
|
|
return 1
|
|
|
|
site_pkgs = site_packages(py)
|
|
anchor_dir = requirements.parent.resolve()
|
|
req_names = list(dict.fromkeys(parse_requirements_file(requirements, anchor_dir=anchor_dir)))
|
|
if not req_names:
|
|
print(
|
|
"error: no package names found in requirements file "
|
|
"(only name-based specs and -r includes are parsed)",
|
|
file=sys.stderr,
|
|
)
|
|
return 1
|
|
|
|
exports: list[tuple[str, str, Path]] = []
|
|
seen_modules: set[str] = set()
|
|
failures = 0
|
|
|
|
for req_name in req_names:
|
|
dist = distribution_for_requirement_in_venv(req_name, site_pkgs)
|
|
if dist is None:
|
|
print(f"warning: package not installed in venv, skipping: {req_name}", file=sys.stderr)
|
|
failures += 1
|
|
continue
|
|
|
|
display_name = dist.metadata.get("Name", req_name)
|
|
package_version = distribution_version(dist)
|
|
modules = import_modules_for_distribution(dist)
|
|
if not modules:
|
|
print(
|
|
f"warning: no safe import modules for {display_name}, skipping",
|
|
file=sys.stderr,
|
|
)
|
|
failures += 1
|
|
continue
|
|
|
|
print(f"{display_name} -> {', '.join(modules)}", file=sys.stderr)
|
|
|
|
for module in modules:
|
|
if module in seen_modules:
|
|
continue
|
|
seen_modules.add(module)
|
|
|
|
out_dir = safe_output_dir(docs_root, module)
|
|
if out_dir is None:
|
|
print(f"warning: unsafe module name, skipping: {module!r}", file=sys.stderr)
|
|
failures += 1
|
|
continue
|
|
|
|
try:
|
|
api_md = export_module(
|
|
pydoc_markdown,
|
|
site_pkgs,
|
|
module,
|
|
out_dir,
|
|
package_name=display_name,
|
|
package_version=package_version,
|
|
keep_old_versions=args.keep_old_versions,
|
|
)
|
|
except subprocess.CalledProcessError as exc:
|
|
print(f"warning: failed to export {module}: {exc}", file=sys.stderr)
|
|
failures += 1
|
|
continue
|
|
except RuntimeError as exc:
|
|
print(f"warning: failed to export {module}: {exc}", file=sys.stderr)
|
|
failures += 1
|
|
continue
|
|
except OSError as exc:
|
|
print(f"warning: failed to export {module}: {exc}", file=sys.stderr)
|
|
failures += 1
|
|
continue
|
|
|
|
size_kb = api_md.stat().st_size // 1024
|
|
print(f" wrote {api_md} ({size_kb} KiB)", file=sys.stderr)
|
|
exports.append((module, package_version, api_md))
|
|
|
|
if exports:
|
|
write_docs_index(docs_root, exports)
|
|
print(f"Done. Documentation in {docs_root.resolve()}", file=sys.stderr)
|
|
|
|
if not exports:
|
|
print("error: no documentation was exported", file=sys.stderr)
|
|
return 1
|
|
|
|
if args.strict and failures:
|
|
print(f"error: {failures} package/module export failure(s) (--strict)", file=sys.stderr)
|
|
return 1
|
|
|
|
if failures:
|
|
print(f"warning: {failures} package/module export failure(s)", file=sys.stderr)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|