Files
py-export-api-docs/py-export-api-docs.py
grayhook 0230587040 Add installed package version to exported API docs.
Write api-<version>.md with version metadata in each file and index, sanitize
output for safe paths/markdown, atomically replace exports, and add
--keep-old-versions to retain prior exports as an archive.
2026-05-30 18:14:33 +07:00

563 lines
17 KiB
Python
Executable File

#!/usr/bin/env python3
"""Export Markdown API docs for packages listed in requirements.txt.
Creates docs/<import-module>/api-<version>.md under the current working directory using a
temporary virtualenv in /tmp (does not install into the system Python).
Supported requirements lines: PEP 508 name specs and ``-r`` includes (must stay
under the directory of the top-level requirements file). Lines like ``-e``,
direct URLs, and other pip options are installed by pip but not exported.
Example:
./scripts/export-api-docs.py requirements.txt
./scripts/export-api-docs.py /path/to/requirements.txt --recreate-venv
./scripts/export-api-docs.py requirements.txt --strict
"""
from __future__ import annotations
import argparse
import hashlib
import importlib.metadata as im
import os
import re
import shutil
import subprocess
import sys
from pathlib import Path
# Pin for reproducible exports (pydoc-markdown 4.x API).
PYDOC_MARKDOWN_SPEC = "pydoc-markdown>=4.0,<5"
# PyPI distribution name (normalized) -> import roots when metadata is missing.
PACKAGE_MODULE_FALLBACKS: dict[str, list[str]] = {
"python-gitlab": ["gitlab"],
"python-redmine": ["redminelib"],
"pyyaml": ["yaml"],
"pillow": ["PIL"],
}
MODULE_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
REQUIREMENTS_HASH_FILE = ".requirements.sha256"
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Export Markdown API docs for requirements.txt packages.",
)
parser.add_argument(
"requirements",
type=Path,
help="Path to requirements.txt",
)
parser.add_argument(
"--venv-dir",
type=Path,
default=None,
help="Virtualenv location (default: /tmp/export-api-docs-<uid>-<hash>)",
)
parser.add_argument(
"--recreate-venv",
action="store_true",
help="Remove and recreate the temporary virtualenv",
)
parser.add_argument(
"--strict",
action="store_true",
help="Exit with failure if any package or module export fails",
)
parser.add_argument(
"--keep-old-versions",
action="store_true",
help="Keep previous api-<version>.md and legacy api.md files in each module directory",
)
return parser.parse_args()
def normalize_dist_name(name: str) -> str:
return re.sub(r"[-_.]+", "-", name).lower()
def parse_requirements_file(
path: Path,
seen: set[Path] | None = None,
anchor_dir: Path | None = None,
) -> list[str]:
"""Return normalized PyPI distribution names from a requirements file."""
path = path.resolve()
if seen is None:
seen = set()
if anchor_dir is None:
anchor_dir = path.parent.resolve()
if path in seen:
return []
seen.add(path)
names: list[str] = []
for raw in path.read_text(encoding="utf-8").splitlines():
line = raw.split("#", 1)[0].strip()
if not line:
continue
if line.startswith("-r"):
parts = line.split(maxsplit=1)
if len(parts) != 2:
continue
include = (path.parent / parts[1]).resolve()
try:
include.relative_to(anchor_dir)
except ValueError:
print(
f"warning: skipping -r outside requirements tree: {include}",
file=sys.stderr,
)
continue
if not include.is_file():
print(f"warning: requirements include not found: {include}", file=sys.stderr)
continue
names.extend(parse_requirements_file(include, seen, anchor_dir))
continue
if line.startswith("-"):
continue
line = re.split(r"[;@]", line, maxsplit=1)[0].strip()
line = re.sub(r"\[.*\]", "", line).strip()
match = re.match(r"^([A-Za-z0-9][A-Za-z0-9._-]*)", line)
if match:
names.append(normalize_dist_name(match.group(1)))
return names
def default_venv_dir() -> Path:
digest = hashlib.sha256(str(Path.cwd().resolve()).encode()).hexdigest()[:12]
return Path("/tmp") / f"export-api-docs-{os.getuid()}-{digest}"
def venv_python(venv: Path) -> Path:
if os.name == "nt":
return venv / "Scripts" / "python.exe"
return venv / "bin" / "python"
def venv_executable(venv: Path, name: str) -> Path:
if os.name == "nt":
return venv / "Scripts" / f"{name}.exe"
return venv / "bin" / name
def run(cmd: list[str], **kwargs) -> None:
subprocess.run(cmd, check=True, **kwargs)
def check_venv_module() -> None:
try:
subprocess.run(
[sys.executable, "-m", "venv", "--help"],
check=True,
capture_output=True,
)
except (subprocess.CalledProcessError, FileNotFoundError) as exc:
print(
"error: python venv module unavailable "
f"(install python3-venv): {exc}",
file=sys.stderr,
)
sys.exit(1)
def collect_requirements_files(
path: Path,
seen: set[Path] | None = None,
anchor_dir: Path | None = None,
) -> list[Path]:
"""Return all requirements files reachable via -r from path (in stable order)."""
path = path.resolve()
if seen is None:
seen = set()
if anchor_dir is None:
anchor_dir = path.parent.resolve()
if path in seen:
return []
seen.add(path)
files = [path]
for raw in path.read_text(encoding="utf-8").splitlines():
line = raw.split("#", 1)[0].strip()
if not line.startswith("-r"):
continue
parts = line.split(maxsplit=1)
if len(parts) != 2:
continue
include = (path.parent / parts[1]).resolve()
try:
include.relative_to(anchor_dir)
except ValueError:
continue
if include.is_file():
files.extend(collect_requirements_files(include, seen, anchor_dir))
return files
def requirements_tree_digest(root: Path) -> str:
digest = hashlib.sha256()
anchor = root.parent.resolve()
for req_file in sorted(collect_requirements_files(root, anchor_dir=anchor)):
digest.update(str(req_file).encode())
digest.update(req_file.read_bytes())
return digest.hexdigest()
def venv_needs_install(venv: Path, requirements: Path, recreate: bool) -> bool:
if recreate or not venv.is_dir():
return True
stamp = venv / REQUIREMENTS_HASH_FILE
if not stamp.is_file():
return True
return stamp.read_text(encoding="utf-8").strip() != requirements_tree_digest(requirements)
def ensure_venv(venv: Path, requirements: Path, recreate: bool) -> Path:
check_venv_module()
py = venv_python(venv)
if recreate and venv.exists():
shutil.rmtree(venv)
if not venv.exists():
print(f"Creating virtualenv: {venv}", file=sys.stderr)
run([sys.executable, "-m", "venv", str(venv)])
if venv_needs_install(venv, requirements, recreate=False):
print("Installing packages into temporary virtualenv…", file=sys.stderr)
run([str(py), "-m", "pip", "install", "-q", "-U", "pip", "setuptools", "wheel"])
run(
[
str(py),
"-m",
"pip",
"install",
"-q",
PYDOC_MARKDOWN_SPEC,
"-r",
str(requirements.resolve()),
],
)
(venv / REQUIREMENTS_HASH_FILE).write_text(
requirements_tree_digest(requirements),
encoding="utf-8",
)
elif not venv_executable(venv, "pydoc-markdown").is_file():
print("Installing pydoc-markdown into virtualenv…", file=sys.stderr)
run([str(py), "-m", "pip", "install", "-q", PYDOC_MARKDOWN_SPEC])
return py
def site_packages(py: Path) -> Path:
code = """
import site
import sys
from pathlib import Path
paths = site.getsitepackages()
if not paths:
for entry in sys.path:
if entry.endswith("site-packages"):
paths.append(entry)
if not paths:
ver = f"{sys.version_info.major}.{sys.version_info.minor}"
guess = Path(sys.prefix) / "lib" / f"python{ver}" / "site-packages"
if guess.is_dir():
paths.append(str(guess))
if not paths:
raise SystemExit("no site-packages directory found")
print(paths[0])
"""
out = subprocess.check_output([str(py), "-c", code], text=True).strip()
return Path(out)
def is_safe_module_name(module: str) -> bool:
return MODULE_RE.fullmatch(module) is not None
def distribution_version(dist: im.Distribution) -> str:
try:
version = dist.version
if version:
return version
except (im.PackageNotFoundError, AttributeError):
pass
return "unknown"
def safe_metadata_value(value: str) -> str:
cleaned = value.strip()
if not cleaned:
return "unknown"
cleaned = re.sub(r"[\x00-\x1f\x7f]", "", cleaned)
return cleaned.replace("-->", "-- >")
def safe_markdown_inline(value: str) -> str:
return (
safe_metadata_value(value)
.replace("\\", "\\\\")
.replace("`", "\\`")
.replace("|", "\\|")
)
def safe_version_for_filename(version: str) -> str:
cleaned = re.sub(r"[^\w.+-]", "_", safe_metadata_value(version))
return cleaned or "unknown"
def export_doc_header(*, package_name: str, package_version: str, module: str) -> str:
name = safe_markdown_inline(package_name)
version = safe_markdown_inline(package_version)
module_name = safe_markdown_inline(module)
return (
f"<!-- py-export-api-docs: package={safe_metadata_value(package_name)} "
f"version={safe_metadata_value(package_version)} "
f"module={safe_metadata_value(module)} -->\n\n"
f"> **Source:** `{name}` **{version}** "
f"(import: `{module_name}`)\n\n"
"---\n\n"
)
def remove_stale_api_exports(out_dir: Path, keep: Path) -> None:
for pattern in ("api-*.md", "api.md"):
for old in out_dir.glob(pattern):
if old != keep:
old.unlink()
def safe_output_dir(docs_root: Path, module: str) -> Path | None:
if not is_safe_module_name(module):
return None
docs_resolved = docs_root.resolve()
out_dir = (docs_root / module).resolve()
try:
out_dir.relative_to(docs_resolved)
except ValueError:
return None
return out_dir
def import_modules_for_distribution(dist: im.Distribution) -> list[str]:
modules: list[str] = []
try:
text = dist.read_text("top_level.txt")
except (FileNotFoundError, OSError, TypeError):
text = None
if text:
modules = [
line.strip()
for line in text.splitlines()
if line.strip() and not line.strip().startswith("_")
]
if modules:
return sorted({m for m in modules if is_safe_module_name(m)})
key = normalize_dist_name(dist.metadata.get("Name", ""))
if key in PACKAGE_MODULE_FALLBACKS:
return PACKAGE_MODULE_FALLBACKS[key]
guess = key.replace("-", "_")
if is_safe_module_name(guess):
print(
f"warning: guessing import module {guess!r} for {key!r} "
"(no top_level.txt); may be wrong",
file=sys.stderr,
)
return [guess]
return []
def export_module(
pydoc_markdown: Path,
site_pkgs: Path,
module: str,
out_dir: Path,
*,
package_name: str,
package_version: str,
keep_old_versions: bool = False,
) -> Path:
out_dir.mkdir(parents=True, exist_ok=True)
version_tag = safe_version_for_filename(package_version)
api_md = out_dir / f"api-{version_tag}.md"
header = export_doc_header(
package_name=package_name,
package_version=package_version,
module=module,
)
body_tmp = out_dir / f".{module}.pydoc.tmp"
export_tmp = out_dir / f".{module}.export.tmp"
try:
with body_tmp.open("w", encoding="utf-8") as handle:
subprocess.run(
[
str(pydoc_markdown),
"-p",
module,
"-I",
str(site_pkgs),
"-q",
],
check=True,
stdout=handle,
)
if body_tmp.stat().st_size == 0:
raise RuntimeError(f"pydoc-markdown produced no output for {module!r}")
with export_tmp.open("w", encoding="utf-8") as handle:
handle.write(header)
with body_tmp.open("r", encoding="utf-8") as body:
shutil.copyfileobj(body, handle)
os.replace(export_tmp, api_md)
if not keep_old_versions:
remove_stale_api_exports(out_dir, api_md)
finally:
body_tmp.unlink(missing_ok=True)
export_tmp.unlink(missing_ok=True)
return api_md
def write_docs_index(docs_root: Path, exports: list[tuple[str, str, Path]]) -> None:
lines = [
"# API documentation (exported)",
"",
"Generated from `requirements.txt` via `py-export-api-docs.py`.",
"",
"| Module | Version | File |",
"|--------|---------|------|",
]
for module, version, api_md in sorted(exports):
rel = api_md.relative_to(docs_root)
safe_version = safe_markdown_inline(version)
lines.append(f"| `{module}` | `{safe_version}` | [{rel}]({rel}) |")
(docs_root / "README.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
def distribution_for_requirement_in_venv(
name: str,
site_pkgs: Path,
) -> im.Distribution | None:
for dist in im.distributions(path=[str(site_pkgs)]):
meta_name = dist.metadata.get("Name", "")
if normalize_dist_name(meta_name) == name:
return dist
return None
def main() -> int:
args = parse_args()
requirements = args.requirements.resolve()
if not requirements.is_file():
print(f"error: requirements file not found: {requirements}", file=sys.stderr)
return 1
venv = args.venv_dir or default_venv_dir()
docs_root = Path.cwd() / "docs"
docs_root.mkdir(parents=True, exist_ok=True)
py = ensure_venv(venv, requirements, args.recreate_venv)
pydoc_markdown = venv_executable(venv, "pydoc-markdown")
if not pydoc_markdown.is_file():
print(f"error: pydoc-markdown not found in {venv}", file=sys.stderr)
return 1
site_pkgs = site_packages(py)
anchor_dir = requirements.parent.resolve()
req_names = list(dict.fromkeys(parse_requirements_file(requirements, anchor_dir=anchor_dir)))
if not req_names:
print(
"error: no package names found in requirements file "
"(only name-based specs and -r includes are parsed)",
file=sys.stderr,
)
return 1
exports: list[tuple[str, str, Path]] = []
seen_modules: set[str] = set()
failures = 0
for req_name in req_names:
dist = distribution_for_requirement_in_venv(req_name, site_pkgs)
if dist is None:
print(f"warning: package not installed in venv, skipping: {req_name}", file=sys.stderr)
failures += 1
continue
display_name = dist.metadata.get("Name", req_name)
package_version = distribution_version(dist)
modules = import_modules_for_distribution(dist)
if not modules:
print(
f"warning: no safe import modules for {display_name}, skipping",
file=sys.stderr,
)
failures += 1
continue
print(f"{display_name} -> {', '.join(modules)}", file=sys.stderr)
for module in modules:
if module in seen_modules:
continue
seen_modules.add(module)
out_dir = safe_output_dir(docs_root, module)
if out_dir is None:
print(f"warning: unsafe module name, skipping: {module!r}", file=sys.stderr)
failures += 1
continue
try:
api_md = export_module(
pydoc_markdown,
site_pkgs,
module,
out_dir,
package_name=display_name,
package_version=package_version,
keep_old_versions=args.keep_old_versions,
)
except subprocess.CalledProcessError as exc:
print(f"warning: failed to export {module}: {exc}", file=sys.stderr)
failures += 1
continue
except RuntimeError as exc:
print(f"warning: failed to export {module}: {exc}", file=sys.stderr)
failures += 1
continue
except OSError as exc:
print(f"warning: failed to export {module}: {exc}", file=sys.stderr)
failures += 1
continue
size_kb = api_md.stat().st_size // 1024
print(f" wrote {api_md} ({size_kb} KiB)", file=sys.stderr)
exports.append((module, package_version, api_md))
if exports:
write_docs_index(docs_root, exports)
print(f"Done. Documentation in {docs_root.resolve()}", file=sys.stderr)
if not exports:
print("error: no documentation was exported", file=sys.stderr)
return 1
if args.strict and failures:
print(f"error: {failures} package/module export failure(s) (--strict)", file=sys.stderr)
return 1
if failures:
print(f"warning: {failures} package/module export failure(s)", file=sys.stderr)
return 0
if __name__ == "__main__":
sys.exit(main())