soteria/scripts/doc_hygiene_check.py

120 lines
3.5 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
"""Require docs for exported Go types/functions, with explicit legacy waivers."""
from __future__ import annotations
import argparse
import re
import sys
from pathlib import Path
TYPE_RE = re.compile(r"^\s*type\s+([A-Z][A-Za-z0-9_]*)\b")
FUNC_RE = re.compile(r"^\s*func\s*(?:\([^)]*\)\s*)?([A-Z][A-Za-z0-9_]*)\s*\(")
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument("--root", default=".")
parser.add_argument("--waivers", required=True)
return parser.parse_args()
def iter_go_sources(root: Path) -> list[Path]:
files: list[Path] = []
for rel_root in ("cmd", "internal"):
base = root / rel_root
if not base.exists():
continue
for path in sorted(base.rglob("*.go")):
rel = path.relative_to(root).as_posix()
if rel.endswith("_test.go"):
continue
if rel.startswith("internal/server/ui-dist/"):
continue
files.append(path)
return files
def has_leading_comment(lines: list[str], idx: int) -> bool:
cursor = idx - 1
while cursor >= 0 and lines[cursor].strip() == "":
cursor -= 1
if cursor < 0:
return False
line = lines[cursor].lstrip()
if line.startswith("//"):
return True
if "*/" in line:
while cursor >= 0:
if "/*" in lines[cursor]:
return True
cursor -= 1
return False
def load_waivers(path: Path) -> set[tuple[str, str, str]]:
waivers: set[tuple[str, str, str]] = set()
if not path.exists():
return waivers
for raw in path.read_text(encoding="utf-8").splitlines():
line = raw.strip()
if not line or line.startswith("#"):
continue
parts = line.split("\t")
if len(parts) < 3:
continue
waivers.add((parts[0], parts[1], parts[2]))
return waivers
def main() -> int:
args = parse_args()
root = Path(args.root).resolve()
waiver_path = Path(args.waivers).resolve()
waivers = load_waivers(waiver_path)
violations: list[str] = []
seen: set[tuple[str, str, str]] = set()
for path in iter_go_sources(root):
rel = path.relative_to(root).as_posix()
lines = path.read_text(encoding="utf-8", errors="ignore").splitlines()
for idx, line in enumerate(lines):
kind = ""
name = ""
type_match = TYPE_RE.match(line)
if type_match:
kind = "type"
name = type_match.group(1)
else:
func_match = FUNC_RE.match(line)
if func_match:
kind = "func"
name = func_match.group(1)
if not kind:
continue
if has_leading_comment(lines, idx):
continue
key = (rel, kind, name)
seen.add(key)
if key not in waivers:
violations.append(f"{rel}:{idx + 1}: missing doc comment for {kind} {name}")
stale_waivers = sorted(waivers - seen)
if stale_waivers:
print("Doc hygiene warning: stale waivers present (safe to remove):")
for rel, kind, name in stale_waivers:
print(f" {rel}\t{kind}\t{name}")
if violations:
print("Doc hygiene check failed:")
for item in violations:
print(item)
return 1
print("Doc hygiene checks: ok")
return 0
if __name__ == "__main__":
sys.exit(main())