From ac91e16f16701a8074ed891f5e4f2ed1192cb0ca Mon Sep 17 00:00:00 2001 From: codex Date: Wed, 20 May 2026 04:57:58 -0300 Subject: [PATCH] triage: sanitize local model diagnoses --- ariadne/services/testing_triage_diagnosis.py | 127 ++++++++++++++++++- tests/test_testing_triage_diagnosis.py | 42 ++++++ 2 files changed, 162 insertions(+), 7 deletions(-) diff --git a/ariadne/services/testing_triage_diagnosis.py b/ariadne/services/testing_triage_diagnosis.py index 0076dc9..46f68f8 100644 --- a/ariadne/services/testing_triage_diagnosis.py +++ b/ariadne/services/testing_triage_diagnosis.py @@ -19,8 +19,9 @@ _DIAGNOSIS_SYSTEM_PROMPT = ( "or summarize the evidence as nested input data. Confidence must be low, medium, or " "high. Next actions must be read-only verification or Flux/IaC changes; never suggest " "mutating kubectl commands or reading Kubernetes Secret values. If evidence is " - "insufficient, say that plainly in root_cause. Use normal English words with spaces; " - "do not concatenate words." + "insufficient, say that plainly in root_cause. All response values must be written " + "in English using ASCII text only. Never answer in Chinese or any other non-English " + "language. Use normal English words with spaces; do not concatenate words." ) _DIAGNOSIS_RESPONSE_SCHEMA: dict[str, Any] = { "type": "object", @@ -132,6 +133,8 @@ def _diagnosis_prompt(bundle: dict[str, Any]) -> str: "Return only headline, root_cause, blast_radius, confidence, needs_human, next_actions, evidence_refs.", "Do not include keys named pipelines, quality, bundle, evidence, summary, or unknowns.", "Prefer concrete Jenkins job names, Flux Kustomizations, pod names, nodes, and metrics when present.", + "Only discuss Jenkins suite jobs listed in bundle.summary.failed_suites; ignore stale unrelated jobs.", + "Write every string in English ASCII only.", ], "bundle": _model_evidence_payload(bundle), } @@ -173,14 +176,35 @@ def _diagnosis_from_model( unknowns = list(bundle.get("unknowns") or []) if isinstance(bundle.get("unknowns"), list) else [] if parse_error: unknowns.append(parse_error) + blocked_jobs = _out_of_scope_jobs(parsed, summary) diagnosis = { - "headline": _text_value(parsed.get("headline"), "Testing triage needs review."), - "root_cause": _text_value(parsed.get("root_cause"), "Evidence is insufficient for a confident root cause."), - "blast_radius": _text_value(parsed.get("blast_radius"), _blast_radius_fallback(summary)), + "headline": _safe_text_value( + parsed.get("headline"), + "Testing triage needs review.", + unknowns, + "headline", + blocked_jobs, + ), + "root_cause": _safe_text_value( + parsed.get("root_cause"), + "Evidence is insufficient for a confident root cause.", + unknowns, + "root_cause", + blocked_jobs, + ), + "blast_radius": _safe_text_value( + parsed.get("blast_radius"), + _blast_radius_fallback(summary), + unknowns, + "blast_radius", + blocked_jobs, + ), "confidence": _confidence(parsed.get("confidence")), "needs_human": _bool_value(parsed.get("needs_human"), bool(summary.get("problem_count"))), - "next_actions": _text_list(parsed.get("next_actions")) or _default_next_actions(summary), - "evidence_refs": _text_list(parsed.get("evidence_refs")) or _default_evidence_refs(summary), + "next_actions": _safe_text_list(parsed.get("next_actions"), unknowns, "next_actions", blocked_jobs) + or _default_next_actions(summary), + "evidence_refs": _safe_evidence_refs(parsed.get("evidence_refs"), summary, unknowns) + or _default_evidence_refs(summary), } return { "kind": "testing_triage_diagnosis", @@ -239,6 +263,23 @@ def _text_value(value: Any, default: str) -> str: return default +def _safe_text_value( + value: Any, + default: str, + unknowns: list[Any], + field: str, + blocked_jobs: set[str], +) -> str: + text = _text_value(value, default) + if not _english_ascii(text): + unknowns.append(f"model_{field}_non_english") + return default + if _mentions_blocked_job(text, blocked_jobs): + unknowns.append(f"model_{field}_out_of_scope") + return default + return text + + def _text_list(value: Any) -> list[str]: if isinstance(value, str) and value.strip(): return [value.strip()] @@ -247,6 +288,78 @@ def _text_list(value: Any) -> list[str]: return [str(item).strip() for item in value if str(item).strip()][:8] +def _safe_text_list(value: Any, unknowns: list[Any], field: str, blocked_jobs: set[str]) -> list[str]: + values = [] + for item in _text_list(value): + if not _english_ascii(item): + unknowns.append(f"model_{field}_non_english") + continue + if _mentions_blocked_job(item, blocked_jobs): + unknowns.append(f"model_{field}_out_of_scope") + continue + values.append(item) + return values + + +def _safe_evidence_refs(value: Any, summary: dict[str, Any], unknowns: list[Any]) -> list[str]: + refs = [] + for ref in _text_list(value): + if not _english_ascii(ref): + unknowns.append("model_evidence_refs_non_english") + continue + if not _evidence_ref_in_scope(ref, summary): + unknowns.append("model_evidence_refs_out_of_scope") + continue + refs.append(ref) + return refs + + +def _english_ascii(text: str) -> bool: + return all(ord(char) < 128 for char in text) + + +def _mentions_blocked_job(text: str, blocked_jobs: set[str]) -> bool: + lowered = text.lower() + return any(job.lower() in lowered for job in blocked_jobs) + + +def _out_of_scope_jobs(parsed: dict[str, Any], summary: dict[str, Any]) -> set[str]: + allowed = _allowed_suite_jobs(summary) + jobs = {_job_name_from_ref(ref) for ref in _text_list(parsed.get("evidence_refs"))} + return {job for job in jobs if job and job not in allowed} + + +def _evidence_ref_in_scope(ref: str, summary: dict[str, Any]) -> bool: + job = _job_name_from_ref(ref) + return not job or job in _allowed_suite_jobs(summary) + + +def _job_name_from_ref(ref: str) -> str: + marker = "/job/" + if marker not in ref: + return "" + return ref.split(marker, 1)[1].split("/", 1)[0].strip().lower() + + +def _allowed_suite_jobs(summary: dict[str, Any]) -> set[str]: + failed_suites = summary.get("failed_suites") if isinstance(summary.get("failed_suites"), list) else [] + aliases = { + "bstein_home": "bstein-dev-home", + "data_prepper": "data-prepper", + "titan_iac": "titan-iac", + } + allowed = set() + for suite in failed_suites: + name = str(suite).strip().lower() + if not name: + continue + allowed.add(name) + allowed.add(name.replace("_", "-")) + if name in aliases: + allowed.add(aliases[name]) + return allowed + + def _confidence(value: Any) -> str: confidence = str(value or "").strip().lower() return confidence if confidence in {"low", "medium", "high"} else "low" diff --git a/tests/test_testing_triage_diagnosis.py b/tests/test_testing_triage_diagnosis.py index 05b8b1f..2b5ef47 100644 --- a/tests/test_testing_triage_diagnosis.py +++ b/tests/test_testing_triage_diagnosis.py @@ -261,6 +261,48 @@ def test_diagnosis_from_model_coerces_fallback_values(monkeypatch) -> None: assert diagnosis["unknowns"] == ["existing_unknown", "parse warning"] +def test_diagnosis_from_model_rejects_non_english_and_out_of_scope_jobs(monkeypatch) -> None: + monkeypatch.setattr(testing_triage_diagnosis, "settings", SettingsStub(testing_triage_model="triage-model")) + diagnosis = testing_triage_diagnosis._diagnosis_from_model( # noqa: SLF001 + { + "generated_at": "bundle-time", + "summary": { + "status": "needs_attention", + "problem_count": 2, + "failed_suites": ["titan_iac"], + }, + "unknowns": [], + }, + { + "headline": "多项目持续集成状态更新", + "root_cause": "arcanagon is stale and titan_iac failed.", + "blast_radius": "arcanagon and titan_iac", + "confidence": "high", + "needs_human": True, + "next_actions": [ + "检查 titan_iac logs.", + "Review arcanagon logs.", + "Review titan_iac logs.", + ], + "evidence_refs": [ + "https://ci.bstein.dev/job/arcanagon/1/consoleText", + "https://ci.bstein.dev/job/titan-iac/463/consoleText", + ], + }, + "raw response", + None, + ) + + assert diagnosis["diagnosis"]["headline"] == "Testing triage needs review." + assert diagnosis["diagnosis"]["root_cause"] == "Evidence is insufficient for a confident root cause." + assert diagnosis["diagnosis"]["blast_radius"] == "titan_iac" + assert diagnosis["diagnosis"]["next_actions"] == ["Review titan_iac logs."] + assert diagnosis["diagnosis"]["evidence_refs"] == ["https://ci.bstein.dev/job/titan-iac/463/consoleText"] + assert "model_headline_non_english" in diagnosis["unknowns"] + assert "model_root_cause_out_of_scope" in diagnosis["unknowns"] + assert "model_evidence_refs_out_of_scope" in diagnosis["unknowns"] + + def test_default_evidence_refs_include_failed_suites() -> None: refs = testing_triage_diagnosis._default_evidence_refs( # noqa: SLF001 {"status": "needs_attention", "problem_count": 3, "failed_suites": ["a", "b", "c", "d", "e", "f", "g"]}