2026-04-10 15:56:18 -03:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
|
|
|
|
ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)
|
|
|
|
|
REPORT_DIR="${ROOT_DIR}/target/hygiene-gate"
|
|
|
|
|
CLIPPY_JSON="${REPORT_DIR}/clippy.json"
|
|
|
|
|
SUMMARY_TXT="${REPORT_DIR}/summary.txt"
|
|
|
|
|
BASELINE_JSON="${ROOT_DIR}/scripts/ci/hygiene_gate_baseline.json"
|
|
|
|
|
|
|
|
|
|
mkdir -p "${REPORT_DIR}"
|
|
|
|
|
|
|
|
|
|
cargo clippy --workspace --all-targets --message-format json -- -W clippy::pedantic >"${CLIPPY_JSON}"
|
|
|
|
|
|
|
|
|
|
python3 - "${CLIPPY_JSON}" "${BASELINE_JSON}" "${SUMMARY_TXT}" "${ROOT_DIR}" <<'PY'
|
|
|
|
|
import json
|
|
|
|
|
import pathlib
|
|
|
|
|
import re
|
|
|
|
|
import sys
|
2026-04-10 17:00:33 -03:00
|
|
|
from collections import defaultdict
|
2026-04-10 15:56:18 -03:00
|
|
|
|
|
|
|
|
clippy_path = pathlib.Path(sys.argv[1])
|
|
|
|
|
baseline_path = pathlib.Path(sys.argv[2])
|
|
|
|
|
summary_path = pathlib.Path(sys.argv[3])
|
|
|
|
|
root = pathlib.Path(sys.argv[4])
|
|
|
|
|
|
|
|
|
|
fn_re = re.compile(r'^\s*(?:pub(?:\([^)]+\))?\s+)?(?:async\s+)?(?:unsafe\s+)?fn\s+\w+')
|
|
|
|
|
|
|
|
|
|
def load_json_lines(path: pathlib.Path):
|
|
|
|
|
for raw in path.read_text(encoding='utf-8').splitlines():
|
|
|
|
|
raw = raw.strip()
|
|
|
|
|
if not raw:
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
yield json.loads(raw)
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
def repo_relative(path: str) -> str | None:
|
|
|
|
|
try:
|
|
|
|
|
return pathlib.Path(path).resolve().relative_to(root).as_posix()
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def clippy_counts(path: pathlib.Path) -> dict[str, int]:
|
|
|
|
|
counts: dict[str, int] = defaultdict(int)
|
|
|
|
|
for item in load_json_lines(path):
|
|
|
|
|
if item.get('reason') != 'compiler-message':
|
|
|
|
|
continue
|
|
|
|
|
message = item.get('message', {})
|
|
|
|
|
if message.get('level') != 'warning':
|
|
|
|
|
continue
|
|
|
|
|
spans = message.get('spans') or []
|
|
|
|
|
primary = next((span for span in spans if span.get('is_primary')), None)
|
|
|
|
|
if primary is None:
|
|
|
|
|
primary = spans[0] if spans else None
|
|
|
|
|
if not primary:
|
|
|
|
|
continue
|
|
|
|
|
rel = repo_relative(primary.get('file_name', ''))
|
|
|
|
|
if rel is None or '/src/' not in rel or '/target/' in rel:
|
|
|
|
|
continue
|
|
|
|
|
if '/src/tests/' in rel:
|
|
|
|
|
continue
|
|
|
|
|
counts[rel] += 1
|
|
|
|
|
return dict(sorted(counts.items()))
|
|
|
|
|
|
|
|
|
|
def function_blocks(lines: list[str]):
|
|
|
|
|
index = 0
|
|
|
|
|
while index < len(lines):
|
|
|
|
|
if not fn_re.match(lines[index]):
|
|
|
|
|
index += 1
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
start = index
|
|
|
|
|
doc_ok = False
|
|
|
|
|
prev = index - 1
|
|
|
|
|
while prev >= 0 and not lines[prev].strip():
|
|
|
|
|
prev -= 1
|
|
|
|
|
if prev >= 0:
|
|
|
|
|
stripped = lines[prev].lstrip()
|
|
|
|
|
doc_ok = stripped.startswith('///') or stripped.startswith('#[doc =')
|
|
|
|
|
|
|
|
|
|
brace_depth = 0
|
|
|
|
|
seen_open = False
|
|
|
|
|
body_lines = 0
|
|
|
|
|
j = index
|
|
|
|
|
while j < len(lines):
|
|
|
|
|
text = lines[j]
|
|
|
|
|
brace_depth += text.count('{') - text.count('}')
|
|
|
|
|
if '{' in text:
|
|
|
|
|
seen_open = True
|
|
|
|
|
if seen_open and text.strip():
|
|
|
|
|
body_lines += 1
|
|
|
|
|
if seen_open and brace_depth <= 0:
|
|
|
|
|
break
|
|
|
|
|
j += 1
|
|
|
|
|
|
|
|
|
|
block_text = '\n'.join(lines[start:j + 1])
|
|
|
|
|
non_trivial = body_lines >= 12 or any(token in block_text for token in (' if ', ' match ', ' for ', ' while ', ' loop ', '?.'))
|
|
|
|
|
yield start + 1, j + 1, doc_ok, non_trivial
|
|
|
|
|
index = j + 1
|
|
|
|
|
|
|
|
|
|
def doc_debt_counts(path: pathlib.Path) -> dict[str, int]:
|
|
|
|
|
counts: dict[str, int] = defaultdict(int)
|
|
|
|
|
for file in sorted(root.rglob('*.rs')):
|
|
|
|
|
rel = repo_relative(str(file))
|
|
|
|
|
if rel is None or '/src/' not in rel or '/target/' in rel:
|
|
|
|
|
continue
|
|
|
|
|
if '/src/tests/' in rel:
|
|
|
|
|
continue
|
|
|
|
|
lines = file.read_text(encoding='utf-8').splitlines()
|
|
|
|
|
debt = 0
|
|
|
|
|
for _, _, doc_ok, non_trivial in function_blocks(lines):
|
|
|
|
|
if non_trivial and not doc_ok:
|
|
|
|
|
debt += 1
|
|
|
|
|
counts[rel] = debt
|
|
|
|
|
return dict(sorted(counts.items()))
|
|
|
|
|
|
|
|
|
|
def source_loc_counts() -> dict[str, int]:
|
|
|
|
|
counts: dict[str, int] = {}
|
|
|
|
|
for file in sorted(root.rglob('*.rs')):
|
|
|
|
|
rel = repo_relative(str(file))
|
|
|
|
|
if rel is None or '/src/' not in rel or '/target/' in rel:
|
|
|
|
|
continue
|
|
|
|
|
if '/src/tests/' in rel:
|
|
|
|
|
continue
|
|
|
|
|
counts[rel] = sum(1 for _ in file.open('r', encoding='utf-8'))
|
|
|
|
|
return dict(sorted(counts.items()))
|
|
|
|
|
|
2026-04-10 17:00:33 -03:00
|
|
|
def integration_layout_violations() -> list[str]:
|
|
|
|
|
violations: list[str] = []
|
|
|
|
|
for file in sorted(root.rglob('*.rs')):
|
|
|
|
|
rel = repo_relative(str(file))
|
|
|
|
|
if rel is None or rel.startswith('target/') or rel.startswith('testing/'):
|
|
|
|
|
continue
|
|
|
|
|
parts = pathlib.Path(rel).parts
|
|
|
|
|
if len(parts) >= 3 and parts[1] == 'src' and parts[2] == 'tests':
|
|
|
|
|
violations.append(
|
|
|
|
|
f'{rel}: integration tests must live under testing/tests/ instead of package-local src/tests/'
|
|
|
|
|
)
|
|
|
|
|
elif len(parts) >= 2 and parts[1] == 'tests':
|
|
|
|
|
violations.append(
|
|
|
|
|
f'{rel}: integration tests must live under testing/tests/ instead of package-local tests/'
|
|
|
|
|
)
|
|
|
|
|
return violations
|
|
|
|
|
|
|
|
|
|
def testing_contract_violations() -> list[str]:
|
|
|
|
|
violations: list[str] = []
|
|
|
|
|
contract_dir = root / 'testing' / 'tests'
|
|
|
|
|
if not contract_dir.exists():
|
|
|
|
|
return ['testing/tests: missing dedicated top-level integration test directory']
|
|
|
|
|
|
|
|
|
|
test_files = sorted(contract_dir.rglob('*.rs'))
|
|
|
|
|
if not test_files:
|
|
|
|
|
return ['testing/tests: no integration test files found']
|
|
|
|
|
|
|
|
|
|
filename_re = re.compile(r'^[a-z0-9_]+\.rs$')
|
|
|
|
|
required_markers = ('Scope:', 'Targets:', 'Why:')
|
|
|
|
|
for file in test_files:
|
|
|
|
|
rel = repo_relative(str(file))
|
|
|
|
|
if rel is None:
|
|
|
|
|
continue
|
|
|
|
|
loc = sum(1 for _ in file.open('r', encoding='utf-8'))
|
|
|
|
|
if loc > 500:
|
|
|
|
|
violations.append(f'{rel}: exceeds 500 LOC contract ({loc})')
|
|
|
|
|
if not filename_re.match(file.name):
|
|
|
|
|
violations.append(f'{rel}: filename must use snake_case for meaningful modularization')
|
|
|
|
|
|
|
|
|
|
text = file.read_text(encoding='utf-8')
|
|
|
|
|
header = '\n'.join(text.splitlines()[:20])
|
|
|
|
|
for marker in required_markers:
|
|
|
|
|
if marker not in header:
|
|
|
|
|
violations.append(f'{rel}: missing required module contract marker {marker}')
|
|
|
|
|
if '#[test]' not in text and '#[tokio::test]' not in text:
|
|
|
|
|
violations.append(f'{rel}: missing test entrypoints')
|
|
|
|
|
return violations
|
|
|
|
|
|
2026-04-10 15:56:18 -03:00
|
|
|
current = {}
|
|
|
|
|
for path, loc in source_loc_counts().items():
|
|
|
|
|
current[path] = {'loc': loc}
|
|
|
|
|
for path, count in clippy_counts(clippy_path).items():
|
|
|
|
|
current.setdefault(path, {})['clippy_warnings'] = count
|
|
|
|
|
for path, count in doc_debt_counts(root).items():
|
|
|
|
|
current.setdefault(path, {})['doc_debt'] = count
|
|
|
|
|
|
|
|
|
|
baseline = {'files': {}}
|
|
|
|
|
if baseline_path.exists():
|
|
|
|
|
with baseline_path.open('r', encoding='utf-8') as fh:
|
|
|
|
|
baseline = json.load(fh)
|
|
|
|
|
baseline_files = baseline.get('files', {})
|
|
|
|
|
|
|
|
|
|
regressions = []
|
|
|
|
|
for path, current_entry in current.items():
|
|
|
|
|
baseline_entry = baseline_files.get(path)
|
|
|
|
|
if baseline_entry is None:
|
|
|
|
|
regressions.append(f'{path}: missing baseline entry')
|
|
|
|
|
continue
|
|
|
|
|
for key in ('loc', 'clippy_warnings', 'doc_debt'):
|
|
|
|
|
current_value = int(current_entry.get(key, 0))
|
|
|
|
|
baseline_value = int(baseline_entry.get(key, 0))
|
|
|
|
|
if current_value > baseline_value:
|
|
|
|
|
regressions.append(
|
|
|
|
|
f'{path}: {key} grew from {baseline_value} to {current_value}'
|
|
|
|
|
)
|
|
|
|
|
|
2026-04-10 17:00:33 -03:00
|
|
|
layout_violations = integration_layout_violations()
|
|
|
|
|
testing_violations = testing_contract_violations()
|
|
|
|
|
|
2026-04-10 15:56:18 -03:00
|
|
|
totals = {
|
|
|
|
|
'files': len(current),
|
|
|
|
|
'over_500': sum(1 for entry in current.values() if int(entry.get('loc', 0)) > 500),
|
|
|
|
|
'clippy_warnings': sum(int(entry.get('clippy_warnings', 0)) for entry in current.values()),
|
|
|
|
|
'doc_debt': sum(int(entry.get('doc_debt', 0)) for entry in current.values()),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lines = []
|
|
|
|
|
lines.append('hygiene gate report')
|
|
|
|
|
lines.append(f"files tracked: {totals['files']}")
|
|
|
|
|
lines.append(f"files over 500 LOC: {totals['over_500']}")
|
|
|
|
|
lines.append(f"clippy warnings tracked: {totals['clippy_warnings']}")
|
|
|
|
|
lines.append(f"non-trivial undocumented functions tracked: {totals['doc_debt']}")
|
2026-04-10 17:00:33 -03:00
|
|
|
lines.append(f'legacy integration-test layout violations: {len(layout_violations)}')
|
|
|
|
|
lines.append(f'testing module contract violations: {len(testing_violations)}')
|
2026-04-10 15:56:18 -03:00
|
|
|
lines.append('')
|
|
|
|
|
lines.append('path | loc | clippy warnings | doc debt | baseline status')
|
|
|
|
|
lines.append('-' * 78)
|
|
|
|
|
for path in sorted(current):
|
|
|
|
|
entry = current[path]
|
|
|
|
|
baseline_entry = baseline_files.get(path)
|
|
|
|
|
if baseline_entry is None:
|
|
|
|
|
status = 'new'
|
|
|
|
|
baseline_loc = 'n/a'
|
|
|
|
|
baseline_clippy = 'n/a'
|
|
|
|
|
baseline_doc = 'n/a'
|
|
|
|
|
else:
|
|
|
|
|
baseline_loc = str(baseline_entry.get('loc', 0))
|
|
|
|
|
baseline_clippy = str(baseline_entry.get('clippy_warnings', 0))
|
|
|
|
|
baseline_doc = str(baseline_entry.get('doc_debt', 0))
|
|
|
|
|
status = 'ok'
|
|
|
|
|
if (
|
|
|
|
|
int(entry.get('loc', 0)) > int(baseline_entry.get('loc', 0))
|
|
|
|
|
or int(entry.get('clippy_warnings', 0)) > int(baseline_entry.get('clippy_warnings', 0))
|
|
|
|
|
or int(entry.get('doc_debt', 0)) > int(baseline_entry.get('doc_debt', 0))
|
|
|
|
|
):
|
|
|
|
|
status = 'regressed'
|
|
|
|
|
lines.append(
|
|
|
|
|
f"{path} | {entry.get('loc', 0)} | {entry.get('clippy_warnings', 0)} | {entry.get('doc_debt', 0)} | {baseline_loc}/{baseline_clippy}/{baseline_doc} | {status}"
|
|
|
|
|
)
|
|
|
|
|
|
2026-04-10 17:00:33 -03:00
|
|
|
if layout_violations:
|
|
|
|
|
lines.append('')
|
|
|
|
|
lines.append('layout violations')
|
|
|
|
|
lines.append('-' * 78)
|
|
|
|
|
lines.extend(layout_violations)
|
|
|
|
|
|
|
|
|
|
if testing_violations:
|
|
|
|
|
lines.append('')
|
|
|
|
|
lines.append('testing module contract violations')
|
|
|
|
|
lines.append('-' * 78)
|
|
|
|
|
lines.extend(testing_violations)
|
|
|
|
|
|
2026-04-10 15:56:18 -03:00
|
|
|
summary_path.write_text('\n'.join(lines) + '\n', encoding='utf-8')
|
|
|
|
|
print(summary_path.read_text(encoding='utf-8'))
|
|
|
|
|
|
2026-04-10 17:00:33 -03:00
|
|
|
if regressions or layout_violations or testing_violations:
|
2026-04-10 15:56:18 -03:00
|
|
|
for line in regressions:
|
|
|
|
|
print(line, file=sys.stderr)
|
2026-04-10 17:00:33 -03:00
|
|
|
for line in layout_violations:
|
|
|
|
|
print(line, file=sys.stderr)
|
|
|
|
|
for line in testing_violations:
|
|
|
|
|
print(line, file=sys.stderr)
|
2026-04-10 15:56:18 -03:00
|
|
|
raise SystemExit(1)
|
|
|
|
|
PY
|