#!/usr/bin/env python3 """Export Markdown API docs for packages listed in requirements.txt. Creates docs//api.md under the current working directory using a temporary virtualenv in /tmp (does not install into the system Python). Supported requirements lines: PEP 508 name specs and ``-r`` includes (must stay under the directory of the top-level requirements file). Lines like ``-e``, direct URLs, and other pip options are installed by pip but not exported. Example: ./scripts/export-api-docs.py requirements.txt ./scripts/export-api-docs.py /path/to/requirements.txt --recreate-venv ./scripts/export-api-docs.py requirements.txt --strict """ from __future__ import annotations import argparse import hashlib import importlib.metadata as im import os import re import shutil import subprocess import sys from pathlib import Path # Pin for reproducible exports (pydoc-markdown 4.x API). PYDOC_MARKDOWN_SPEC = "pydoc-markdown>=4.0,<5" # PyPI distribution name (normalized) -> import roots when metadata is missing. PACKAGE_MODULE_FALLBACKS: dict[str, list[str]] = { "python-gitlab": ["gitlab"], "python-redmine": ["redminelib"], "pyyaml": ["yaml"], "pillow": ["PIL"], } MODULE_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") REQUIREMENTS_HASH_FILE = ".requirements.sha256" def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Export Markdown API docs for requirements.txt packages.", ) parser.add_argument( "requirements", type=Path, help="Path to requirements.txt", ) parser.add_argument( "--venv-dir", type=Path, default=None, help="Virtualenv location (default: /tmp/export-api-docs--)", ) parser.add_argument( "--recreate-venv", action="store_true", help="Remove and recreate the temporary virtualenv", ) parser.add_argument( "--strict", action="store_true", help="Exit with failure if any package or module export fails", ) return parser.parse_args() def normalize_dist_name(name: str) -> str: return re.sub(r"[-_.]+", "-", name).lower() def parse_requirements_file( path: Path, seen: set[Path] | None = None, anchor_dir: Path | None = None, ) -> list[str]: """Return normalized PyPI distribution names from a requirements file.""" path = path.resolve() if seen is None: seen = set() if anchor_dir is None: anchor_dir = path.parent.resolve() if path in seen: return [] seen.add(path) names: list[str] = [] for raw in path.read_text(encoding="utf-8").splitlines(): line = raw.split("#", 1)[0].strip() if not line: continue if line.startswith("-r"): parts = line.split(maxsplit=1) if len(parts) != 2: continue include = (path.parent / parts[1]).resolve() try: include.relative_to(anchor_dir) except ValueError: print( f"warning: skipping -r outside requirements tree: {include}", file=sys.stderr, ) continue if not include.is_file(): print(f"warning: requirements include not found: {include}", file=sys.stderr) continue names.extend(parse_requirements_file(include, seen, anchor_dir)) continue if line.startswith("-"): continue line = re.split(r"[;@]", line, maxsplit=1)[0].strip() line = re.sub(r"\[.*\]", "", line).strip() match = re.match(r"^([A-Za-z0-9][A-Za-z0-9._-]*)", line) if match: names.append(normalize_dist_name(match.group(1))) return names def default_venv_dir() -> Path: digest = hashlib.sha256(str(Path.cwd().resolve()).encode()).hexdigest()[:12] return Path("/tmp") / f"export-api-docs-{os.getuid()}-{digest}" def venv_python(venv: Path) -> Path: if os.name == "nt": return venv / "Scripts" / "python.exe" return venv / "bin" / "python" def venv_executable(venv: Path, name: str) -> Path: if os.name == "nt": return venv / "Scripts" / f"{name}.exe" return venv / "bin" / name def run(cmd: list[str], **kwargs) -> None: subprocess.run(cmd, check=True, **kwargs) def check_venv_module() -> None: try: subprocess.run( [sys.executable, "-m", "venv", "--help"], check=True, capture_output=True, ) except (subprocess.CalledProcessError, FileNotFoundError) as exc: print( "error: python venv module unavailable " f"(install python3-venv): {exc}", file=sys.stderr, ) sys.exit(1) def collect_requirements_files( path: Path, seen: set[Path] | None = None, anchor_dir: Path | None = None, ) -> list[Path]: """Return all requirements files reachable via -r from path (in stable order).""" path = path.resolve() if seen is None: seen = set() if anchor_dir is None: anchor_dir = path.parent.resolve() if path in seen: return [] seen.add(path) files = [path] for raw in path.read_text(encoding="utf-8").splitlines(): line = raw.split("#", 1)[0].strip() if not line.startswith("-r"): continue parts = line.split(maxsplit=1) if len(parts) != 2: continue include = (path.parent / parts[1]).resolve() try: include.relative_to(anchor_dir) except ValueError: continue if include.is_file(): files.extend(collect_requirements_files(include, seen, anchor_dir)) return files def requirements_tree_digest(root: Path) -> str: digest = hashlib.sha256() anchor = root.parent.resolve() for req_file in sorted(collect_requirements_files(root, anchor_dir=anchor)): digest.update(str(req_file).encode()) digest.update(req_file.read_bytes()) return digest.hexdigest() def venv_needs_install(venv: Path, requirements: Path, recreate: bool) -> bool: if recreate or not venv.is_dir(): return True stamp = venv / REQUIREMENTS_HASH_FILE if not stamp.is_file(): return True return stamp.read_text(encoding="utf-8").strip() != requirements_tree_digest(requirements) def ensure_venv(venv: Path, requirements: Path, recreate: bool) -> Path: check_venv_module() py = venv_python(venv) if recreate and venv.exists(): shutil.rmtree(venv) if not venv.exists(): print(f"Creating virtualenv: {venv}", file=sys.stderr) run([sys.executable, "-m", "venv", str(venv)]) if venv_needs_install(venv, requirements, recreate=False): print("Installing packages into temporary virtualenv…", file=sys.stderr) run([str(py), "-m", "pip", "install", "-q", "-U", "pip", "setuptools", "wheel"]) run( [ str(py), "-m", "pip", "install", "-q", PYDOC_MARKDOWN_SPEC, "-r", str(requirements.resolve()), ], ) (venv / REQUIREMENTS_HASH_FILE).write_text( requirements_tree_digest(requirements), encoding="utf-8", ) elif not venv_executable(venv, "pydoc-markdown").is_file(): print("Installing pydoc-markdown into virtualenv…", file=sys.stderr) run([str(py), "-m", "pip", "install", "-q", PYDOC_MARKDOWN_SPEC]) return py def site_packages(py: Path) -> Path: code = """ import site import sys from pathlib import Path paths = site.getsitepackages() if not paths: for entry in sys.path: if entry.endswith("site-packages"): paths.append(entry) if not paths: ver = f"{sys.version_info.major}.{sys.version_info.minor}" guess = Path(sys.prefix) / "lib" / f"python{ver}" / "site-packages" if guess.is_dir(): paths.append(str(guess)) if not paths: raise SystemExit("no site-packages directory found") print(paths[0]) """ out = subprocess.check_output([str(py), "-c", code], text=True).strip() return Path(out) def is_safe_module_name(module: str) -> bool: return MODULE_RE.fullmatch(module) is not None def safe_output_dir(docs_root: Path, module: str) -> Path | None: if not is_safe_module_name(module): return None docs_resolved = docs_root.resolve() out_dir = (docs_root / module).resolve() try: out_dir.relative_to(docs_resolved) except ValueError: return None return out_dir def import_modules_for_distribution(dist: im.Distribution) -> list[str]: modules: list[str] = [] try: text = dist.read_text("top_level.txt") except (FileNotFoundError, OSError, TypeError): text = None if text: modules = [ line.strip() for line in text.splitlines() if line.strip() and not line.strip().startswith("_") ] if modules: return sorted({m for m in modules if is_safe_module_name(m)}) key = normalize_dist_name(dist.metadata.get("Name", "")) if key in PACKAGE_MODULE_FALLBACKS: return PACKAGE_MODULE_FALLBACKS[key] guess = key.replace("-", "_") if is_safe_module_name(guess): print( f"warning: guessing import module {guess!r} for {key!r} " "(no top_level.txt); may be wrong", file=sys.stderr, ) return [guess] return [] def export_module( pydoc_markdown: Path, site_pkgs: Path, module: str, out_dir: Path, ) -> Path: out_dir.mkdir(parents=True, exist_ok=True) api_md = out_dir / "api.md" with api_md.open("w", encoding="utf-8") as handle: run( [ str(pydoc_markdown), "-p", module, "-I", str(site_pkgs), "-q", ], stdout=handle, ) if not api_md.is_file() or api_md.stat().st_size == 0: raise FileNotFoundError(f"Expected non-empty output file: {api_md}") return api_md def write_docs_index(docs_root: Path, exports: list[tuple[str, Path]]) -> None: lines = [ "# API documentation (exported)", "", "Generated from `requirements.txt` via `scripts/export-api-docs.py`.", "", "| Module | File |", "|--------|------|", ] for module, api_md in sorted(exports): rel = api_md.relative_to(docs_root) lines.append(f"| `{module}` | [{rel}]({rel}) |") (docs_root / "README.md").write_text("\n".join(lines) + "\n", encoding="utf-8") def distribution_for_requirement_in_venv( name: str, site_pkgs: Path, ) -> im.Distribution | None: for dist in im.distributions(path=[str(site_pkgs)]): meta_name = dist.metadata.get("Name", "") if normalize_dist_name(meta_name) == name: return dist return None def main() -> int: args = parse_args() requirements = args.requirements.resolve() if not requirements.is_file(): print(f"error: requirements file not found: {requirements}", file=sys.stderr) return 1 venv = args.venv_dir or default_venv_dir() docs_root = Path.cwd() / "docs" docs_root.mkdir(parents=True, exist_ok=True) py = ensure_venv(venv, requirements, args.recreate_venv) pydoc_markdown = venv_executable(venv, "pydoc-markdown") if not pydoc_markdown.is_file(): print(f"error: pydoc-markdown not found in {venv}", file=sys.stderr) return 1 site_pkgs = site_packages(py) anchor_dir = requirements.parent.resolve() req_names = list(dict.fromkeys(parse_requirements_file(requirements, anchor_dir=anchor_dir))) if not req_names: print( "error: no package names found in requirements file " "(only name-based specs and -r includes are parsed)", file=sys.stderr, ) return 1 exports: list[tuple[str, Path]] = [] seen_modules: set[str] = set() failures = 0 for req_name in req_names: dist = distribution_for_requirement_in_venv(req_name, site_pkgs) if dist is None: print(f"warning: package not installed in venv, skipping: {req_name}", file=sys.stderr) failures += 1 continue display_name = dist.metadata.get("Name", req_name) modules = import_modules_for_distribution(dist) if not modules: print( f"warning: no safe import modules for {display_name}, skipping", file=sys.stderr, ) failures += 1 continue print(f"{display_name} -> {', '.join(modules)}", file=sys.stderr) for module in modules: if module in seen_modules: continue seen_modules.add(module) out_dir = safe_output_dir(docs_root, module) if out_dir is None: print(f"warning: unsafe module name, skipping: {module!r}", file=sys.stderr) failures += 1 continue try: api_md = export_module(pydoc_markdown, site_pkgs, module, out_dir) except subprocess.CalledProcessError as exc: print(f"warning: failed to export {module}: {exc}", file=sys.stderr) failures += 1 continue except OSError as exc: print(f"warning: failed to export {module}: {exc}", file=sys.stderr) failures += 1 continue size_kb = api_md.stat().st_size // 1024 print(f" wrote {api_md} ({size_kb} KiB)", file=sys.stderr) exports.append((module, api_md)) if exports: write_docs_index(docs_root, exports) print(f"Done. Documentation in {docs_root.resolve()}", file=sys.stderr) if not exports: print("error: no documentation was exported", file=sys.stderr) return 1 if args.strict and failures: print(f"error: {failures} package/module export failure(s) (--strict)", file=sys.stderr) return 1 if failures: print(f"warning: {failures} package/module export failure(s)", file=sys.stderr) return 0 if __name__ == "__main__": sys.exit(main())