lesavka/scripts/ci/hygiene_gate.sh

#!/usr/bin/env bash
set -euo pipefail

ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)
REPORT_DIR="${ROOT_DIR}/target/hygiene-gate"
CLIPPY_JSON="${REPORT_DIR}/clippy.json"
SUMMARY_TXT="${REPORT_DIR}/summary.txt"
BASELINE_JSON="${ROOT_DIR}/scripts/ci/hygiene_gate_baseline.json"

mkdir -p "${REPORT_DIR}"

cargo clippy --workspace --all-targets --message-format json -- -W clippy::pedantic >"${CLIPPY_JSON}"

python3 - "${CLIPPY_JSON}" "${BASELINE_JSON}" "${SUMMARY_TXT}" "${ROOT_DIR}" <<'PY'
import json
import pathlib
import re
import sys
from collections import defaultdict

clippy_path = pathlib.Path(sys.argv[1])
baseline_path = pathlib.Path(sys.argv[2])
summary_path = pathlib.Path(sys.argv[3])
root = pathlib.Path(sys.argv[4])

fn_re = re.compile(r'^\s*(?:pub(?:\([^)]+\))?\s+)?(?:async\s+)?(?:unsafe\s+)?fn\s+\w+')

def load_json_lines(path: pathlib.Path):
    for raw in path.read_text(encoding='utf-8').splitlines():
        raw = raw.strip()
        if not raw:
            continue
        try:
            yield json.loads(raw)
        except json.JSONDecodeError:
            continue

def repo_relative(path: str) -> str | None:
    try:
        return pathlib.Path(path).resolve().relative_to(root).as_posix()
    except Exception:
        return None

def clippy_counts(path: pathlib.Path) -> dict[str, int]:
    counts: dict[str, int] = defaultdict(int)
    for item in load_json_lines(path):
        if item.get('reason') != 'compiler-message':
            continue
        message = item.get('message', {})
        if message.get('level') != 'warning':
            continue
        spans = message.get('spans') or []
        primary = next((span for span in spans if span.get('is_primary')), None)
        if primary is None:
            primary = spans[0] if spans else None
        if not primary:
            continue
        rel = repo_relative(primary.get('file_name', ''))
        if rel is None or '/src/' not in rel or '/target/' in rel:
            continue
        if '/src/tests/' in rel:
            continue
        counts[rel] += 1
    return dict(sorted(counts.items()))

def function_blocks(lines: list[str]):
    index = 0
    while index < len(lines):
        if not fn_re.match(lines[index]):
            index += 1
            continue

        start = index
        doc_ok = False
        prev = index - 1
        while prev >= 0 and not lines[prev].strip():
            prev -= 1
        if prev >= 0:
            stripped = lines[prev].lstrip()
            doc_ok = stripped.startswith('///') or stripped.startswith('#[doc =')

        brace_depth = 0
        seen_open = False
        body_lines = 0
        j = index
        while j < len(lines):
            text = lines[j]
            brace_depth += text.count('{') - text.count('}')
            if '{' in text:
                seen_open = True
            if seen_open and text.strip():
                body_lines += 1
            if seen_open and brace_depth <= 0:
                break
            j += 1

        block_text = '\n'.join(lines[start:j + 1])
        non_trivial = body_lines >= 12 or any(token in block_text for token in (' if ', ' match ', ' for ', ' while ', ' loop ', '?.'))
        yield start + 1, j + 1, doc_ok, non_trivial
        index = j + 1

def doc_debt_counts(path: pathlib.Path) -> dict[str, int]:
    counts: dict[str, int] = defaultdict(int)
    for file in sorted(root.rglob('*.rs')):
        rel = repo_relative(str(file))
        if rel is None or '/src/' not in rel or '/target/' in rel:
            continue
        if '/src/tests/' in rel:
            continue
        lines = file.read_text(encoding='utf-8').splitlines()
        debt = 0
        for _, _, doc_ok, non_trivial in function_blocks(lines):
            if non_trivial and not doc_ok:
                debt += 1
        counts[rel] = debt
    return dict(sorted(counts.items()))

def source_loc_counts() -> dict[str, int]:
    counts: dict[str, int] = {}
    for file in sorted(root.rglob('*.rs')):
        rel = repo_relative(str(file))
        if rel is None or '/src/' not in rel or '/target/' in rel:
            continue
        if '/src/tests/' in rel:
            continue
        counts[rel] = sum(1 for _ in file.open('r', encoding='utf-8'))
    return dict(sorted(counts.items()))

def integration_layout_violations() -> list[str]:
    violations: list[str] = []
    for file in sorted(root.rglob('*.rs')):
        rel = repo_relative(str(file))
        if rel is None or rel.startswith('target/') or rel.startswith('testing/'):
            continue
        parts = pathlib.Path(rel).parts
        if len(parts) >= 3 and parts[1] == 'src' and parts[2] == 'tests':
            violations.append(
                f'{rel}: integration tests must live under testing/tests/ instead of package-local src/tests/'
            )
        elif len(parts) >= 2 and parts[1] == 'tests':
            violations.append(
                f'{rel}: integration tests must live under testing/tests/ instead of package-local tests/'
            )
    return violations

def testing_contract_violations() -> list[str]:
    violations: list[str] = []
    contract_dir = root / 'testing' / 'tests'
    if not contract_dir.exists():
        return ['testing/tests: missing dedicated top-level integration test directory']

    test_files = sorted(contract_dir.rglob('*.rs'))
    if not test_files:
        return ['testing/tests: no integration test files found']

    filename_re = re.compile(r'^[a-z0-9_]+\.rs$')
    required_markers = ('Scope:', 'Targets:', 'Why:')
    for file in test_files:
        rel = repo_relative(str(file))
        if rel is None:
            continue
        loc = sum(1 for _ in file.open('r', encoding='utf-8'))
        if loc > 500:
            violations.append(f'{rel}: exceeds 500 LOC contract ({loc})')
        if not filename_re.match(file.name):
            violations.append(f'{rel}: filename must use snake_case for meaningful modularization')

        text = file.read_text(encoding='utf-8')
        header = '\n'.join(text.splitlines()[:20])
        for marker in required_markers:
            if marker not in header:
                violations.append(f'{rel}: missing required module contract marker {marker}')
        if '#[test]' not in text and '#[tokio::test]' not in text:
            violations.append(f'{rel}: missing test entrypoints')
    return violations

current = {}
for path, loc in source_loc_counts().items():
    current[path] = {'loc': loc}
for path, count in clippy_counts(clippy_path).items():
    current.setdefault(path, {})['clippy_warnings'] = count
for path, count in doc_debt_counts(root).items():
    current.setdefault(path, {})['doc_debt'] = count

baseline = {'files': {}}
if baseline_path.exists():
    with baseline_path.open('r', encoding='utf-8') as fh:
        baseline = json.load(fh)
baseline_files = baseline.get('files', {})

regressions = []
for path, current_entry in current.items():
    baseline_entry = baseline_files.get(path)
    if baseline_entry is None:
        regressions.append(f'{path}: missing baseline entry')
        continue
    for key in ('loc', 'clippy_warnings', 'doc_debt'):
        current_value = int(current_entry.get(key, 0))
        baseline_value = int(baseline_entry.get(key, 0))
        if current_value > baseline_value:
            regressions.append(
                f'{path}: {key} grew from {baseline_value} to {current_value}'
            )

layout_violations = integration_layout_violations()
testing_violations = testing_contract_violations()

totals = {
    'files': len(current),
    'over_500': sum(1 for entry in current.values() if int(entry.get('loc', 0)) > 500),
    'clippy_warnings': sum(int(entry.get('clippy_warnings', 0)) for entry in current.values()),
    'doc_debt': sum(int(entry.get('doc_debt', 0)) for entry in current.values()),
}

lines = []
lines.append('hygiene gate report')
lines.append(f"files tracked: {totals['files']}")
lines.append(f"files over 500 LOC: {totals['over_500']}")
lines.append(f"clippy warnings tracked: {totals['clippy_warnings']}")
lines.append(f"non-trivial undocumented functions tracked: {totals['doc_debt']}")
lines.append(f'legacy integration-test layout violations: {len(layout_violations)}')
lines.append(f'testing module contract violations: {len(testing_violations)}')
lines.append('')
lines.append('path | loc | clippy warnings | doc debt | baseline status')
lines.append('-' * 78)
for path in sorted(current):
    entry = current[path]
    baseline_entry = baseline_files.get(path)
    if baseline_entry is None:
        status = 'new'
        baseline_loc = 'n/a'
        baseline_clippy = 'n/a'
        baseline_doc = 'n/a'
    else:
        baseline_loc = str(baseline_entry.get('loc', 0))
        baseline_clippy = str(baseline_entry.get('clippy_warnings', 0))
        baseline_doc = str(baseline_entry.get('doc_debt', 0))
        status = 'ok'
        if (
            int(entry.get('loc', 0)) > int(baseline_entry.get('loc', 0))
            or int(entry.get('clippy_warnings', 0)) > int(baseline_entry.get('clippy_warnings', 0))
            or int(entry.get('doc_debt', 0)) > int(baseline_entry.get('doc_debt', 0))
        ):
            status = 'regressed'
    lines.append(
        f"{path} | {entry.get('loc', 0)} | {entry.get('clippy_warnings', 0)} | {entry.get('doc_debt', 0)} | {baseline_loc}/{baseline_clippy}/{baseline_doc} | {status}"
    )

if layout_violations:
    lines.append('')
    lines.append('layout violations')
    lines.append('-' * 78)
    lines.extend(layout_violations)

if testing_violations:
    lines.append('')
    lines.append('testing module contract violations')
    lines.append('-' * 78)
    lines.extend(testing_violations)

summary_path.write_text('\n'.join(lines) + '\n', encoding='utf-8')
print(summary_path.read_text(encoding='utf-8'))

if regressions or layout_violations or testing_violations:
    for line in regressions:
        print(line, file=sys.stderr)
    for line in layout_violations:
        print(line, file=sys.stderr)
    for line in testing_violations:
        print(line, file=sys.stderr)
    raise SystemExit(1)
PY
ci: add ratcheting quality and hygiene gates 2026-04-10 15:56:18 -03:00			`#!/usr/bin/env bash`
			`set -euo pipefail`

			`ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)`
			`REPORT_DIR="${ROOT_DIR}/target/hygiene-gate"`
			`CLIPPY_JSON="${REPORT_DIR}/clippy.json"`
			`SUMMARY_TXT="${REPORT_DIR}/summary.txt"`
			`BASELINE_JSON="${ROOT_DIR}/scripts/ci/hygiene_gate_baseline.json"`

			`mkdir -p "${REPORT_DIR}"`

			`cargo clippy --workspace --all-targets --message-format json -- -W clippy::pedantic >"${CLIPPY_JSON}"`

			`python3 - "${CLIPPY_JSON}" "${BASELINE_JSON}" "${SUMMARY_TXT}" "${ROOT_DIR}" <<'PY'`
			`import json`
			`import pathlib`
			`import re`
			`import sys`
ci: centralize integration tests under testing 2026-04-10 17:00:33 -03:00			`from collections import defaultdict`
ci: add ratcheting quality and hygiene gates 2026-04-10 15:56:18 -03:00
			`clippy_path = pathlib.Path(sys.argv[1])`
			`baseline_path = pathlib.Path(sys.argv[2])`
			`summary_path = pathlib.Path(sys.argv[3])`
			`root = pathlib.Path(sys.argv[4])`

			`fn_re = re.compile(r'^\s*(?:pub(?:\([^)]+\))?\s+)?(?:async\s+)?(?:unsafe\s+)?fn\s+\w+')`

			`def load_json_lines(path: pathlib.Path):`
			`for raw in path.read_text(encoding='utf-8').splitlines():`
			`raw = raw.strip()`
			`if not raw:`
			`continue`
			`try:`
			`yield json.loads(raw)`
			`except json.JSONDecodeError:`
			`continue`

			`def repo_relative(path: str) -> str \| None:`
			`try:`
			`return pathlib.Path(path).resolve().relative_to(root).as_posix()`
			`except Exception:`
			`return None`

			`def clippy_counts(path: pathlib.Path) -> dict[str, int]:`
			`counts: dict[str, int] = defaultdict(int)`
			`for item in load_json_lines(path):`
			`if item.get('reason') != 'compiler-message':`
			`continue`
			`message = item.get('message', {})`
			`if message.get('level') != 'warning':`
			`continue`
			`spans = message.get('spans') or []`
			`primary = next((span for span in spans if span.get('is_primary')), None)`
			`if primary is None:`
			`primary = spans[0] if spans else None`
			`if not primary:`
			`continue`
			`rel = repo_relative(primary.get('file_name', ''))`
			`if rel is None or '/src/' not in rel or '/target/' in rel:`
			`continue`
			`if '/src/tests/' in rel:`
			`continue`
			`counts[rel] += 1`
			`return dict(sorted(counts.items()))`

			`def function_blocks(lines: list[str]):`
			`index = 0`
			`while index < len(lines):`
			`if not fn_re.match(lines[index]):`
			`index += 1`
			`continue`

			`start = index`
			`doc_ok = False`
			`prev = index - 1`
			`while prev >= 0 and not lines[prev].strip():`
			`prev -= 1`
			`if prev >= 0:`
			`stripped = lines[prev].lstrip()`
			`doc_ok = stripped.startswith('///') or stripped.startswith('#[doc =')`

			`brace_depth = 0`
			`seen_open = False`
			`body_lines = 0`
			`j = index`
			`while j < len(lines):`
			`text = lines[j]`
			`brace_depth += text.count('{') - text.count('}')`
			`if '{' in text:`
			`seen_open = True`
			`if seen_open and text.strip():`
			`body_lines += 1`
			`if seen_open and brace_depth <= 0:`
			`break`
			`j += 1`

			`block_text = '\n'.join(lines[start:j + 1])`
			`non_trivial = body_lines >= 12 or any(token in block_text for token in (' if ', ' match ', ' for ', ' while ', ' loop ', '?.'))`
			`yield start + 1, j + 1, doc_ok, non_trivial`
			`index = j + 1`

			`def doc_debt_counts(path: pathlib.Path) -> dict[str, int]:`
			`counts: dict[str, int] = defaultdict(int)`
			`for file in sorted(root.rglob('*.rs')):`
			`rel = repo_relative(str(file))`
			`if rel is None or '/src/' not in rel or '/target/' in rel:`
			`continue`
			`if '/src/tests/' in rel:`
			`continue`
			`lines = file.read_text(encoding='utf-8').splitlines()`
			`debt = 0`
			`for _, _, doc_ok, non_trivial in function_blocks(lines):`
			`if non_trivial and not doc_ok:`
			`debt += 1`
			`counts[rel] = debt`
			`return dict(sorted(counts.items()))`

			`def source_loc_counts() -> dict[str, int]:`
			`counts: dict[str, int] = {}`
			`for file in sorted(root.rglob('*.rs')):`
			`rel = repo_relative(str(file))`
			`if rel is None or '/src/' not in rel or '/target/' in rel:`
			`continue`
			`if '/src/tests/' in rel:`
			`continue`
			`counts[rel] = sum(1 for _ in file.open('r', encoding='utf-8'))`
			`return dict(sorted(counts.items()))`

ci: centralize integration tests under testing 2026-04-10 17:00:33 -03:00			`def integration_layout_violations() -> list[str]:`
			`violations: list[str] = []`
			`for file in sorted(root.rglob('*.rs')):`
			`rel = repo_relative(str(file))`
			`if rel is None or rel.startswith('target/') or rel.startswith('testing/'):`
			`continue`
			`parts = pathlib.Path(rel).parts`
			`if len(parts) >= 3 and parts[1] == 'src' and parts[2] == 'tests':`
			`violations.append(`
			`f'{rel}: integration tests must live under testing/tests/ instead of package-local src/tests/'`
			`)`
			`elif len(parts) >= 2 and parts[1] == 'tests':`
			`violations.append(`
			`f'{rel}: integration tests must live under testing/tests/ instead of package-local tests/'`
			`)`
			`return violations`

			`def testing_contract_violations() -> list[str]:`
			`violations: list[str] = []`
			`contract_dir = root / 'testing' / 'tests'`
			`if not contract_dir.exists():`
			`return ['testing/tests: missing dedicated top-level integration test directory']`

			`test_files = sorted(contract_dir.rglob('*.rs'))`
			`if not test_files:`
			`return ['testing/tests: no integration test files found']`

			`filename_re = re.compile(r'^[a-z0-9_]+\.rs$')`
			`required_markers = ('Scope:', 'Targets:', 'Why:')`
			`for file in test_files:`
			`rel = repo_relative(str(file))`
			`if rel is None:`
			`continue`
			`loc = sum(1 for _ in file.open('r', encoding='utf-8'))`
			`if loc > 500:`
			`violations.append(f'{rel}: exceeds 500 LOC contract ({loc})')`
			`if not filename_re.match(file.name):`
			`violations.append(f'{rel}: filename must use snake_case for meaningful modularization')`

			`text = file.read_text(encoding='utf-8')`
			`header = '\n'.join(text.splitlines()[:20])`
			`for marker in required_markers:`
			`if marker not in header:`
			`violations.append(f'{rel}: missing required module contract marker {marker}')`
			`if '#[test]' not in text and '#[tokio::test]' not in text:`
			`violations.append(f'{rel}: missing test entrypoints')`
			`return violations`

ci: add ratcheting quality and hygiene gates 2026-04-10 15:56:18 -03:00			`current = {}`
			`for path, loc in source_loc_counts().items():`
			`current[path] = {'loc': loc}`
			`for path, count in clippy_counts(clippy_path).items():`
			`current.setdefault(path, {})['clippy_warnings'] = count`
			`for path, count in doc_debt_counts(root).items():`
			`current.setdefault(path, {})['doc_debt'] = count`

			`baseline = {'files': {}}`
			`if baseline_path.exists():`
			`with baseline_path.open('r', encoding='utf-8') as fh:`
			`baseline = json.load(fh)`
			`baseline_files = baseline.get('files', {})`

			`regressions = []`
			`for path, current_entry in current.items():`
			`baseline_entry = baseline_files.get(path)`
			`if baseline_entry is None:`
			`regressions.append(f'{path}: missing baseline entry')`
			`continue`
			`for key in ('loc', 'clippy_warnings', 'doc_debt'):`
			`current_value = int(current_entry.get(key, 0))`
			`baseline_value = int(baseline_entry.get(key, 0))`
			`if current_value > baseline_value:`
			`regressions.append(`
			`f'{path}: {key} grew from {baseline_value} to {current_value}'`
			`)`

ci: centralize integration tests under testing 2026-04-10 17:00:33 -03:00			`layout_violations = integration_layout_violations()`
			`testing_violations = testing_contract_violations()`

ci: add ratcheting quality and hygiene gates 2026-04-10 15:56:18 -03:00			`totals = {`
			`'files': len(current),`
			`'over_500': sum(1 for entry in current.values() if int(entry.get('loc', 0)) > 500),`
			`'clippy_warnings': sum(int(entry.get('clippy_warnings', 0)) for entry in current.values()),`
			`'doc_debt': sum(int(entry.get('doc_debt', 0)) for entry in current.values()),`
			`}`

			`lines = []`
			`lines.append('hygiene gate report')`
			`lines.append(f"files tracked: {totals['files']}")`
			`lines.append(f"files over 500 LOC: {totals['over_500']}")`
			`lines.append(f"clippy warnings tracked: {totals['clippy_warnings']}")`
			`lines.append(f"non-trivial undocumented functions tracked: {totals['doc_debt']}")`
ci: centralize integration tests under testing 2026-04-10 17:00:33 -03:00			`lines.append(f'legacy integration-test layout violations: {len(layout_violations)}')`
			`lines.append(f'testing module contract violations: {len(testing_violations)}')`
ci: add ratcheting quality and hygiene gates 2026-04-10 15:56:18 -03:00			`lines.append('')`
			`lines.append('path \| loc \| clippy warnings \| doc debt \| baseline status')`
			`lines.append('-' * 78)`
			`for path in sorted(current):`
			`entry = current[path]`
			`baseline_entry = baseline_files.get(path)`
			`if baseline_entry is None:`
			`status = 'new'`
			`baseline_loc = 'n/a'`
			`baseline_clippy = 'n/a'`
			`baseline_doc = 'n/a'`
			`else:`
			`baseline_loc = str(baseline_entry.get('loc', 0))`
			`baseline_clippy = str(baseline_entry.get('clippy_warnings', 0))`
			`baseline_doc = str(baseline_entry.get('doc_debt', 0))`
			`status = 'ok'`
			`if (`
			`int(entry.get('loc', 0)) > int(baseline_entry.get('loc', 0))`
			`or int(entry.get('clippy_warnings', 0)) > int(baseline_entry.get('clippy_warnings', 0))`
			`or int(entry.get('doc_debt', 0)) > int(baseline_entry.get('doc_debt', 0))`
			`):`
			`status = 'regressed'`
			`lines.append(`
			`f"{path} \| {entry.get('loc', 0)} \| {entry.get('clippy_warnings', 0)} \| {entry.get('doc_debt', 0)} \| {baseline_loc}/{baseline_clippy}/{baseline_doc} \| {status}"`
			`)`

ci: centralize integration tests under testing 2026-04-10 17:00:33 -03:00			`if layout_violations:`
			`lines.append('')`
			`lines.append('layout violations')`
			`lines.append('-' * 78)`
			`lines.extend(layout_violations)`

			`if testing_violations:`
			`lines.append('')`
			`lines.append('testing module contract violations')`
			`lines.append('-' * 78)`
			`lines.extend(testing_violations)`

ci: add ratcheting quality and hygiene gates 2026-04-10 15:56:18 -03:00			`summary_path.write_text('\n'.join(lines) + '\n', encoding='utf-8')`
			`print(summary_path.read_text(encoding='utf-8'))`

ci: centralize integration tests under testing 2026-04-10 17:00:33 -03:00			`if regressions or layout_violations or testing_violations:`
ci: add ratcheting quality and hygiene gates 2026-04-10 15:56:18 -03:00			`for line in regressions:`
			`print(line, file=sys.stderr)`
ci: centralize integration tests under testing 2026-04-10 17:00:33 -03:00			`for line in layout_violations:`
			`print(line, file=sys.stderr)`
			`for line in testing_violations:`
			`print(line, file=sys.stderr)`
ci: add ratcheting quality and hygiene gates 2026-04-10 15:56:18 -03:00			`raise SystemExit(1)`
			`PY`