soteria/scripts/doc_hygiene_check.py

#!/usr/bin/env python3
"""Require docs for exported Go types/functions, with explicit legacy waivers."""

from __future__ import annotations

import argparse
import re
import sys
from pathlib import Path

TYPE_RE = re.compile(r"^\s*type\s+([A-Z][A-Za-z0-9_]*)\b")
FUNC_RE = re.compile(r"^\s*func\s*(?:\([^)]*\)\s*)?([A-Z][A-Za-z0-9_]*)\s*\(")


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser()
    parser.add_argument("--root", default=".")
    parser.add_argument("--waivers", required=True)
    return parser.parse_args()


def iter_go_sources(root: Path) -> list[Path]:
    files: list[Path] = []
    for rel_root in ("cmd", "internal"):
        base = root / rel_root
        if not base.exists():
            continue
        for path in sorted(base.rglob("*.go")):
            rel = path.relative_to(root).as_posix()
            if rel.endswith("_test.go"):
                continue
            if rel.startswith("internal/server/ui-dist/"):
                continue
            files.append(path)
    return files


def has_leading_comment(lines: list[str], idx: int) -> bool:
    cursor = idx - 1
    while cursor >= 0 and lines[cursor].strip() == "":
        cursor -= 1
    if cursor < 0:
        return False
    line = lines[cursor].lstrip()
    if line.startswith("//"):
        return True
    if "*/" in line:
        while cursor >= 0:
            if "/*" in lines[cursor]:
                return True
            cursor -= 1
    return False


def load_waivers(path: Path) -> set[tuple[str, str, str]]:
    waivers: set[tuple[str, str, str]] = set()
    if not path.exists():
        return waivers
    for raw in path.read_text(encoding="utf-8").splitlines():
        line = raw.strip()
        if not line or line.startswith("#"):
            continue
        parts = line.split("\t")
        if len(parts) < 3:
            continue
        waivers.add((parts[0], parts[1], parts[2]))
    return waivers


def main() -> int:
    args = parse_args()
    root = Path(args.root).resolve()
    waiver_path = Path(args.waivers).resolve()
    waivers = load_waivers(waiver_path)

    violations: list[str] = []
    seen: set[tuple[str, str, str]] = set()
    for path in iter_go_sources(root):
        rel = path.relative_to(root).as_posix()
        lines = path.read_text(encoding="utf-8", errors="ignore").splitlines()
        for idx, line in enumerate(lines):
            kind = ""
            name = ""
            type_match = TYPE_RE.match(line)
            if type_match:
                kind = "type"
                name = type_match.group(1)
            else:
                func_match = FUNC_RE.match(line)
                if func_match:
                    kind = "func"
                    name = func_match.group(1)
            if not kind:
                continue
            if has_leading_comment(lines, idx):
                continue
            key = (rel, kind, name)
            seen.add(key)
            if key not in waivers:
                violations.append(f"{rel}:{idx + 1}: missing doc comment for {kind} {name}")

    stale_waivers = sorted(waivers - seen)
    if stale_waivers:
        print("Doc hygiene warning: stale waivers present (safe to remove):")
        for rel, kind, name in stale_waivers:
            print(f"  {rel}\t{kind}\t{name}")

    if violations:
        print("Doc hygiene check failed:")
        for item in violations:
            print(item)
        return 1

    print("Doc hygiene checks: ok")
    return 0


if __name__ == "__main__":
    sys.exit(main())
ci: add wave-1 quality and hygiene gate checks 2026-04-17 01:10:10 -03:00			`#!/usr/bin/env python3`
			`"""Require docs for exported Go types/functions, with explicit legacy waivers."""`

			`from __future__ import annotations`

			`import argparse`
			`import re`
			`import sys`
			`from pathlib import Path`

			`TYPE_RE = re.compile(r"^\stype\s+([A-Z][A-Za-z0-9_])\b")`
			`FUNC_RE = re.compile(r"^\sfunc\s(?:\([^)]\)\s)?([A-Z][A-Za-z0-9_])\s\(")`


			`def parse_args() -> argparse.Namespace:`
			`parser = argparse.ArgumentParser()`
			`parser.add_argument("--root", default=".")`
			`parser.add_argument("--waivers", required=True)`
			`return parser.parse_args()`


			`def iter_go_sources(root: Path) -> list[Path]:`
			`files: list[Path] = []`
			`for rel_root in ("cmd", "internal"):`
			`base = root / rel_root`
			`if not base.exists():`
			`continue`
			`for path in sorted(base.rglob("*.go")):`
			`rel = path.relative_to(root).as_posix()`
			`if rel.endswith("_test.go"):`
			`continue`
			`if rel.startswith("internal/server/ui-dist/"):`
			`continue`
			`files.append(path)`
			`return files`


			`def has_leading_comment(lines: list[str], idx: int) -> bool:`
			`cursor = idx - 1`
			`while cursor >= 0 and lines[cursor].strip() == "":`
			`cursor -= 1`
			`if cursor < 0:`
			`return False`
			`line = lines[cursor].lstrip()`
			`if line.startswith("//"):`
			`return True`
			`if "*/" in line:`
			`while cursor >= 0:`
			`if "/*" in lines[cursor]:`
			`return True`
			`cursor -= 1`
			`return False`


			`def load_waivers(path: Path) -> set[tuple[str, str, str]]:`
			`waivers: set[tuple[str, str, str]] = set()`
			`if not path.exists():`
			`return waivers`
			`for raw in path.read_text(encoding="utf-8").splitlines():`
			`line = raw.strip()`
			`if not line or line.startswith("#"):`
			`continue`
			`parts = line.split("\t")`
			`if len(parts) < 3:`
			`continue`
			`waivers.add((parts[0], parts[1], parts[2]))`
			`return waivers`


			`def main() -> int:`
			`args = parse_args()`
			`root = Path(args.root).resolve()`
			`waiver_path = Path(args.waivers).resolve()`
			`waivers = load_waivers(waiver_path)`

			`violations: list[str] = []`
			`seen: set[tuple[str, str, str]] = set()`
			`for path in iter_go_sources(root):`
			`rel = path.relative_to(root).as_posix()`
			`lines = path.read_text(encoding="utf-8", errors="ignore").splitlines()`
			`for idx, line in enumerate(lines):`
			`kind = ""`
			`name = ""`
			`type_match = TYPE_RE.match(line)`
			`if type_match:`
			`kind = "type"`
			`name = type_match.group(1)`
			`else:`
			`func_match = FUNC_RE.match(line)`
			`if func_match:`
			`kind = "func"`
			`name = func_match.group(1)`
			`if not kind:`
			`continue`
			`if has_leading_comment(lines, idx):`
			`continue`
			`key = (rel, kind, name)`
			`seen.add(key)`
			`if key not in waivers:`
			`violations.append(f"{rel}:{idx + 1}: missing doc comment for {kind} {name}")`

			`stale_waivers = sorted(waivers - seen)`
			`if stale_waivers:`
			`print("Doc hygiene warning: stale waivers present (safe to remove):")`
			`for rel, kind, name in stale_waivers:`
			`print(f" {rel}\t{kind}\t{name}")`

			`if violations:`
			`print("Doc hygiene check failed:")`
			`for item in violations:`
			`print(item)`
			`return 1`

			`print("Doc hygiene checks: ok")`
			`return 0`


			`if __name__ == "__main__":`
			`sys.exit(main())`