triage: limit testing evidence to active suite scope

This commit is contained in:
codex 2026-05-20 05:51:23 -03:00
parent 8c95a1be4f
commit e83ffca7c6
2 changed files with 106 additions and 6 deletions

View File

@ -31,6 +31,24 @@ _JENKINS_TREE = (
_MAX_JENKINS_LOG_LINES = 80
_MAX_JENKINS_LOG_CHARS = 12000
_MAX_EVIDENCE_ITEMS = 12
_IN_SCOPE_TEST_SUITES = frozenset(
{
"ananke",
"ariadne",
"atlasbot",
"bstein_home",
"data_prepper",
"metis",
"pegasus",
"soteria",
"titan_iac",
}
)
_TEST_SUITE_ALIASES = {
"bstein-dev-home": "bstein_home",
"data-prepper": "data_prepper",
"titan-iac": "titan_iac",
}
@dataclass(frozen=True)
@ -217,7 +235,7 @@ def _quality_signals(errors: list[str]) -> dict[str, Any]:
return {
name: {
"query": query,
"items": _vm_items(query, errors),
"items": _scope_quality_items(_vm_items(query, errors)),
}
for name, query in queries.items()
}
@ -262,7 +280,8 @@ def _jenkins_signals(errors: list[str]) -> dict[str, Any]:
except Exception as exc:
errors.append(f"jenkins: {exc}")
return {"failed_builds": []}
failed = [job for job in jobs if job.get("status") in {"failure", "running", "unknown"}]
scoped_jobs = [job for job in jobs if _suite_in_scope(job.get("job"))]
failed = [job for job in scoped_jobs if job.get("status") in {"failure", "running", "unknown"}]
failed.sort(key=lambda item: -(item.get("last_run_ts") or 0))
for job in failed[:3]:
_attach_jenkins_log_tail(job, errors)
@ -372,7 +391,7 @@ def _summary(
jenkins: dict[str, Any],
errors: list[str],
) -> dict[str, Any]:
failed_suites = sorted(_failed_suites(quality))
failed_suites = sorted(_failed_suites(quality) | _jenkins_suites(jenkins))
problem_count = (
len(cluster.get("flux_not_ready") or [])
+ len(cluster.get("pod_issues") or [])
@ -399,10 +418,46 @@ def _failed_suites(quality: dict[str, Any]) -> set[str]:
labels = item.get("labels") if isinstance(item, dict) else {}
suite = labels.get("suite") if isinstance(labels, dict) else None
if isinstance(suite, str) and suite:
suites.add(suite)
canonical = _canonical_suite_name(suite)
if canonical in _IN_SCOPE_TEST_SUITES:
suites.add(canonical)
return suites
def _jenkins_suites(jenkins: dict[str, Any]) -> set[str]:
suites: set[str] = set()
for item in jenkins.get("failed_builds") or []:
if not isinstance(item, dict):
continue
canonical = _canonical_suite_name(item.get("job"))
if canonical in _IN_SCOPE_TEST_SUITES:
suites.add(canonical)
return suites
def _scope_quality_items(items: list[dict[str, Any]]) -> list[dict[str, Any]]:
return [item for item in items if _quality_item_in_scope(item)]
def _quality_item_in_scope(item: dict[str, Any]) -> bool:
labels = item.get("labels") if isinstance(item, dict) else {}
if not isinstance(labels, dict):
return True
raw_suite = labels.get("suite") or labels.get("exported_job") or labels.get("job")
return _suite_in_scope(raw_suite) if raw_suite else True
def _suite_in_scope(value: Any) -> bool:
return _canonical_suite_name(value) in _IN_SCOPE_TEST_SUITES
def _canonical_suite_name(value: Any) -> str:
name = str(value or "").strip().lower()
if "/" in name:
name = name.rsplit("/", 1)[-1]
return _TEST_SUITE_ALIASES.get(name, name.replace("-", "_"))
def _render_markdown(bundle: dict[str, Any]) -> str:
summary = bundle.get("summary") if isinstance(bundle.get("summary"), dict) else {}
evidence = bundle.get("evidence") if isinstance(bundle.get("evidence"), dict) else {}

View File

@ -436,6 +436,51 @@ def test_jenkins_signals_attaches_recent_failed_builds(monkeypatch) -> None:
assert attached == ["running", "unknown", "old"]
def test_jenkins_signals_filters_to_in_scope_suite_jobs(monkeypatch) -> None:
jobs = [
{"job": "lesavka", "status": "running", "last_run_ts": 50},
{"job": "harbor-arm-build", "status": "failure", "last_run_ts": 40},
{"job": "data-prepper", "status": "running", "last_run_ts": 30},
{"job": "folder/ariadne", "status": "failure", "last_run_ts": 20},
{"job": "bstein-dev-home", "status": "unknown", "last_run_ts": 10},
]
attached: list[str] = []
monkeypatch.setattr(testing_triage, "settings", SettingsStub(jenkins_base_url="http://jenkins"))
monkeypatch.setattr(testing_triage, "_fetch_jenkins_jobs", lambda base_url: jobs)
monkeypatch.setattr(testing_triage, "_attach_jenkins_log_tail", lambda job, errors: attached.append(job["job"]))
signals = testing_triage._jenkins_signals([]) # noqa: SLF001
assert [item["job"] for item in signals["failed_builds"]] == [
"data-prepper",
"folder/ariadne",
"bstein-dev-home",
]
assert attached == ["data-prepper", "folder/ariadne", "bstein-dev-home"]
def test_quality_signals_filters_to_in_scope_suites(monkeypatch) -> None:
rows = [
{"labels": {"suite": "ariadne"}, "value": 1.0},
{"labels": {"suite": "lesavka"}, "value": 1.0},
{"labels": {"suite": "typhon"}, "value": 1.0},
{"labels": {"exported_job": "titan-iac"}, "value": 1.0},
{"labels": {"exported_job": "harbor-arm-build"}, "value": 1.0},
{"labels": {}, "value": 1.0},
]
monkeypatch.setattr(testing_triage, "settings", SettingsStub(vm_url="http://vm"))
monkeypatch.setattr(testing_triage, "_vm_items", lambda query, errors: rows)
quality = testing_triage._quality_signals([]) # noqa: SLF001
assert quality["failed_runs_24h"]["items"] == [
{"labels": {"suite": "ariadne"}, "value": 1.0},
{"labels": {"exported_job": "titan-iac"}, "value": 1.0},
{"labels": {}, "value": 1.0},
]
assert testing_triage._failed_suites(quality) == {"ariadne"} # noqa: SLF001
def test_summary_and_markdown_helpers() -> None:
quality = {
"failed_runs_24h": {"items": [{"labels": {"suite": "ariadne"}, "value": 2}]},
@ -448,7 +493,7 @@ def test_summary_and_markdown_helpers() -> None:
"jobs_failing": [{"job": "job"}],
"collected_at": "now",
}
jenkins = {"failed_builds": [{"job": "ariadne"}]}
jenkins = {"failed_builds": [{"job": "bstein-dev-home"}]}
summary = testing_triage._summary(cluster, quality, jenkins, []) # noqa: SLF001
markdown = testing_triage._render_markdown( # noqa: SLF001
@ -461,7 +506,7 @@ def test_summary_and_markdown_helpers() -> None:
)
assert summary["status"] == "needs_attention"
assert summary["failed_suites"] == ["ariadne"]
assert summary["failed_suites"] == ["ariadne", "bstein_home"]
assert "- Flux: monitoring" in markdown
assert "- failed_runs_24h: {'suite': 'ariadne'} value=2" in markdown
assert "- missing vm" in markdown