#!/usr/bin/env python3 """Export Markdown API docs for packages listed in requirements.txt. Creates docs//api-.md under the current working directory using a temporary virtualenv in /tmp (does not install into the system Python). Supported requirements lines: PEP 508 name specs and ``-r`` includes (must stay under the directory of the top-level requirements file). Lines like ``-e``, direct URLs, and other pip options are installed by pip but not exported. Example: ./scripts/export-api-docs.py requirements.txt ./scripts/export-api-docs.py /path/to/requirements.txt --recreate-venv ./scripts/export-api-docs.py requirements.txt --strict """ from __future__ import annotations import argparse import hashlib import importlib.metadata as im import os import re import shutil import subprocess import sys from pathlib import Path # Pin for reproducible exports (pydoc-markdown 4.x API). PYDOC_MARKDOWN_SPEC = "pydoc-markdown>=4.0,<5" # PyPI distribution name (normalized) -> import roots when metadata is missing. PACKAGE_MODULE_FALLBACKS: dict[str, list[str]] = { "python-gitlab": ["gitlab"], "python-redmine": ["redminelib"], "pyyaml": ["yaml"], "pillow": ["PIL"], } MODULE_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$") REQUIREMENTS_HASH_FILE = ".requirements.sha256" def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Export Markdown API docs for requirements.txt packages.", ) parser.add_argument( "requirements", type=Path, help="Path to requirements.txt", ) parser.add_argument( "--venv-dir", type=Path, default=None, help="Virtualenv location (default: /tmp/export-api-docs--)", ) parser.add_argument( "--recreate-venv", action="store_true", help="Remove and recreate the temporary virtualenv", ) parser.add_argument( "--strict", action="store_true", help="Exit with failure if any package or module export fails", ) parser.add_argument( "--keep-old-versions", action="store_true", help="Keep previous api-.md and legacy api.md files in each module directory", ) return parser.parse_args() def normalize_dist_name(name: str) -> str: return re.sub(r"[-_.]+", "-", name).lower() def parse_requirements_file( path: Path, seen: set[Path] | None = None, anchor_dir: Path | None = None, ) -> list[str]: """Return normalized PyPI distribution names from a requirements file.""" path = path.resolve() if seen is None: seen = set() if anchor_dir is None: anchor_dir = path.parent.resolve() if path in seen: return [] seen.add(path) names: list[str] = [] for raw in path.read_text(encoding="utf-8").splitlines(): line = raw.split("#", 1)[0].strip() if not line: continue if line.startswith("-r"): parts = line.split(maxsplit=1) if len(parts) != 2: continue include = (path.parent / parts[1]).resolve() try: include.relative_to(anchor_dir) except ValueError: print( f"warning: skipping -r outside requirements tree: {include}", file=sys.stderr, ) continue if not include.is_file(): print(f"warning: requirements include not found: {include}", file=sys.stderr) continue names.extend(parse_requirements_file(include, seen, anchor_dir)) continue if line.startswith("-"): continue line = re.split(r"[;@]", line, maxsplit=1)[0].strip() line = re.sub(r"\[.*\]", "", line).strip() match = re.match(r"^([A-Za-z0-9][A-Za-z0-9._-]*)", line) if match: names.append(normalize_dist_name(match.group(1))) return names def default_venv_dir() -> Path: digest = hashlib.sha256(str(Path.cwd().resolve()).encode()).hexdigest()[:12] return Path("/tmp") / f"export-api-docs-{os.getuid()}-{digest}" def venv_python(venv: Path) -> Path: if os.name == "nt": return venv / "Scripts" / "python.exe" return venv / "bin" / "python" def venv_executable(venv: Path, name: str) -> Path: if os.name == "nt": return venv / "Scripts" / f"{name}.exe" return venv / "bin" / name def run(cmd: list[str], **kwargs) -> None: subprocess.run(cmd, check=True, **kwargs) def check_venv_module() -> None: try: subprocess.run( [sys.executable, "-m", "venv", "--help"], check=True, capture_output=True, ) except (subprocess.CalledProcessError, FileNotFoundError) as exc: print( "error: python venv module unavailable " f"(install python3-venv): {exc}", file=sys.stderr, ) sys.exit(1) def collect_requirements_files( path: Path, seen: set[Path] | None = None, anchor_dir: Path | None = None, ) -> list[Path]: """Return all requirements files reachable via -r from path (in stable order).""" path = path.resolve() if seen is None: seen = set() if anchor_dir is None: anchor_dir = path.parent.resolve() if path in seen: return [] seen.add(path) files = [path] for raw in path.read_text(encoding="utf-8").splitlines(): line = raw.split("#", 1)[0].strip() if not line.startswith("-r"): continue parts = line.split(maxsplit=1) if len(parts) != 2: continue include = (path.parent / parts[1]).resolve() try: include.relative_to(anchor_dir) except ValueError: continue if include.is_file(): files.extend(collect_requirements_files(include, seen, anchor_dir)) return files def requirements_tree_digest(root: Path) -> str: digest = hashlib.sha256() anchor = root.parent.resolve() for req_file in sorted(collect_requirements_files(root, anchor_dir=anchor)): digest.update(str(req_file).encode()) digest.update(req_file.read_bytes()) return digest.hexdigest() def venv_needs_install(venv: Path, requirements: Path, recreate: bool) -> bool: if recreate or not venv.is_dir(): return True stamp = venv / REQUIREMENTS_HASH_FILE if not stamp.is_file(): return True return stamp.read_text(encoding="utf-8").strip() != requirements_tree_digest(requirements) def ensure_venv(venv: Path, requirements: Path, recreate: bool) -> Path: check_venv_module() py = venv_python(venv) if recreate and venv.exists(): shutil.rmtree(venv) if not venv.exists(): print(f"Creating virtualenv: {venv}", file=sys.stderr) run([sys.executable, "-m", "venv", str(venv)]) if venv_needs_install(venv, requirements, recreate=False): print("Installing packages into temporary virtualenv…", file=sys.stderr) run([str(py), "-m", "pip", "install", "-q", "-U", "pip", "setuptools", "wheel"]) run( [ str(py), "-m", "pip", "install", "-q", PYDOC_MARKDOWN_SPEC, "-r", str(requirements.resolve()), ], ) (venv / REQUIREMENTS_HASH_FILE).write_text( requirements_tree_digest(requirements), encoding="utf-8", ) elif not venv_executable(venv, "pydoc-markdown").is_file(): print("Installing pydoc-markdown into virtualenv…", file=sys.stderr) run([str(py), "-m", "pip", "install", "-q", PYDOC_MARKDOWN_SPEC]) return py def site_packages(py: Path) -> Path: code = """ import site import sys from pathlib import Path paths = site.getsitepackages() if not paths: for entry in sys.path: if entry.endswith("site-packages"): paths.append(entry) if not paths: ver = f"{sys.version_info.major}.{sys.version_info.minor}" guess = Path(sys.prefix) / "lib" / f"python{ver}" / "site-packages" if guess.is_dir(): paths.append(str(guess)) if not paths: raise SystemExit("no site-packages directory found") print(paths[0]) """ out = subprocess.check_output([str(py), "-c", code], text=True).strip() return Path(out) def is_safe_module_name(module: str) -> bool: return MODULE_RE.fullmatch(module) is not None def distribution_version(dist: im.Distribution) -> str: try: version = dist.version if version: return version except (im.PackageNotFoundError, AttributeError): pass return "unknown" def safe_metadata_value(value: str) -> str: cleaned = value.strip() if not cleaned: return "unknown" cleaned = re.sub(r"[\x00-\x1f\x7f]", "", cleaned) return cleaned.replace("-->", "-- >") def safe_markdown_inline(value: str) -> str: return ( safe_metadata_value(value) .replace("\\", "\\\\") .replace("`", "\\`") .replace("|", "\\|") ) def safe_version_for_filename(version: str) -> str: cleaned = re.sub(r"[^\w.+-]", "_", safe_metadata_value(version)) return cleaned or "unknown" def export_doc_header(*, package_name: str, package_version: str, module: str) -> str: name = safe_markdown_inline(package_name) version = safe_markdown_inline(package_version) module_name = safe_markdown_inline(module) return ( f"\n\n" f"> **Source:** `{name}` **{version}** " f"(import: `{module_name}`)\n\n" "---\n\n" ) def remove_stale_api_exports(out_dir: Path, keep: Path) -> None: for pattern in ("api-*.md", "api.md"): for old in out_dir.glob(pattern): if old != keep: old.unlink() def safe_output_dir(docs_root: Path, module: str) -> Path | None: if not is_safe_module_name(module): return None docs_resolved = docs_root.resolve() out_dir = (docs_root / module).resolve() try: out_dir.relative_to(docs_resolved) except ValueError: return None return out_dir def import_modules_for_distribution(dist: im.Distribution) -> list[str]: modules: list[str] = [] try: text = dist.read_text("top_level.txt") except (FileNotFoundError, OSError, TypeError): text = None if text: modules = [ line.strip() for line in text.splitlines() if line.strip() and not line.strip().startswith("_") ] if modules: return sorted({m for m in modules if is_safe_module_name(m)}) key = normalize_dist_name(dist.metadata.get("Name", "")) if key in PACKAGE_MODULE_FALLBACKS: return PACKAGE_MODULE_FALLBACKS[key] guess = key.replace("-", "_") if is_safe_module_name(guess): print( f"warning: guessing import module {guess!r} for {key!r} " "(no top_level.txt); may be wrong", file=sys.stderr, ) return [guess] return [] def export_module( pydoc_markdown: Path, site_pkgs: Path, module: str, out_dir: Path, *, package_name: str, package_version: str, keep_old_versions: bool = False, ) -> Path: out_dir.mkdir(parents=True, exist_ok=True) version_tag = safe_version_for_filename(package_version) api_md = out_dir / f"api-{version_tag}.md" header = export_doc_header( package_name=package_name, package_version=package_version, module=module, ) body_tmp = out_dir / f".{module}.pydoc.tmp" export_tmp = out_dir / f".{module}.export.tmp" try: with body_tmp.open("w", encoding="utf-8") as handle: subprocess.run( [ str(pydoc_markdown), "-p", module, "-I", str(site_pkgs), "-q", ], check=True, stdout=handle, ) if body_tmp.stat().st_size == 0: raise RuntimeError(f"pydoc-markdown produced no output for {module!r}") with export_tmp.open("w", encoding="utf-8") as handle: handle.write(header) with body_tmp.open("r", encoding="utf-8") as body: shutil.copyfileobj(body, handle) os.replace(export_tmp, api_md) if not keep_old_versions: remove_stale_api_exports(out_dir, api_md) finally: body_tmp.unlink(missing_ok=True) export_tmp.unlink(missing_ok=True) return api_md def write_docs_index(docs_root: Path, exports: list[tuple[str, str, Path]]) -> None: lines = [ "# API documentation (exported)", "", "Generated from `requirements.txt` via `py-export-api-docs.py`.", "", "| Module | Version | File |", "|--------|---------|------|", ] for module, version, api_md in sorted(exports): rel = api_md.relative_to(docs_root) safe_version = safe_markdown_inline(version) lines.append(f"| `{module}` | `{safe_version}` | [{rel}]({rel}) |") (docs_root / "README.md").write_text("\n".join(lines) + "\n", encoding="utf-8") def distribution_for_requirement_in_venv( name: str, site_pkgs: Path, ) -> im.Distribution | None: for dist in im.distributions(path=[str(site_pkgs)]): meta_name = dist.metadata.get("Name", "") if normalize_dist_name(meta_name) == name: return dist return None def main() -> int: args = parse_args() requirements = args.requirements.resolve() if not requirements.is_file(): print(f"error: requirements file not found: {requirements}", file=sys.stderr) return 1 venv = args.venv_dir or default_venv_dir() docs_root = Path.cwd() / "docs" docs_root.mkdir(parents=True, exist_ok=True) py = ensure_venv(venv, requirements, args.recreate_venv) pydoc_markdown = venv_executable(venv, "pydoc-markdown") if not pydoc_markdown.is_file(): print(f"error: pydoc-markdown not found in {venv}", file=sys.stderr) return 1 site_pkgs = site_packages(py) anchor_dir = requirements.parent.resolve() req_names = list(dict.fromkeys(parse_requirements_file(requirements, anchor_dir=anchor_dir))) if not req_names: print( "error: no package names found in requirements file " "(only name-based specs and -r includes are parsed)", file=sys.stderr, ) return 1 exports: list[tuple[str, str, Path]] = [] seen_modules: set[str] = set() failures = 0 for req_name in req_names: dist = distribution_for_requirement_in_venv(req_name, site_pkgs) if dist is None: print(f"warning: package not installed in venv, skipping: {req_name}", file=sys.stderr) failures += 1 continue display_name = dist.metadata.get("Name", req_name) package_version = distribution_version(dist) modules = import_modules_for_distribution(dist) if not modules: print( f"warning: no safe import modules for {display_name}, skipping", file=sys.stderr, ) failures += 1 continue print(f"{display_name} -> {', '.join(modules)}", file=sys.stderr) for module in modules: if module in seen_modules: continue seen_modules.add(module) out_dir = safe_output_dir(docs_root, module) if out_dir is None: print(f"warning: unsafe module name, skipping: {module!r}", file=sys.stderr) failures += 1 continue try: api_md = export_module( pydoc_markdown, site_pkgs, module, out_dir, package_name=display_name, package_version=package_version, keep_old_versions=args.keep_old_versions, ) except subprocess.CalledProcessError as exc: print(f"warning: failed to export {module}: {exc}", file=sys.stderr) failures += 1 continue except RuntimeError as exc: print(f"warning: failed to export {module}: {exc}", file=sys.stderr) failures += 1 continue except OSError as exc: print(f"warning: failed to export {module}: {exc}", file=sys.stderr) failures += 1 continue size_kb = api_md.stat().st_size // 1024 print(f" wrote {api_md} ({size_kb} KiB)", file=sys.stderr) exports.append((module, package_version, api_md)) if exports: write_docs_index(docs_root, exports) print(f"Done. Documentation in {docs_root.resolve()}", file=sys.stderr) if not exports: print("error: no documentation was exported", file=sys.stderr) return 1 if args.strict and failures: print(f"error: {failures} package/module export failure(s) (--strict)", file=sys.stderr) return 1 if failures: print(f"warning: {failures} package/module export failure(s)", file=sys.stderr) return 0 if __name__ == "__main__": sys.exit(main())