atlasbot: enforce evidence in answers

2026-01-31 22:27:32 -03:00 · 2026-01-31 22:27:32 -03:00 · 81e2c65a21
commit 81e2c65a21
parent c1f1ef23a6
2 changed files with 59 additions and 2 deletions
--- a/atlasbot/engine/answerer.py
+++ b/atlasbot/engine/answerer.py
@ -121,6 +121,7 @@ class AnswerEngine:
            "tool",
            "followup",
            "select_claims",
+            "evidence_fix",
        }

        def _debug_log(name: str, payload: Any) -> None:
@ -274,6 +275,24 @@ class AnswerEngine:
                observer("synthesize", "synthesizing")
            reply = await self._synthesize_answer(normalized, subanswers, context, classify, plan, call_llm)

+            if snapshot_context and _needs_evidence_fix(reply, classify):
+                if observer:
+                    observer("evidence_fix", "repairing missing evidence")
+                fix_prompt = (
+                    prompts.EVIDENCE_FIX_PROMPT
+                    + "\nQuestion: "
+                    + normalized
+                    + "\nDraft: "
+                    + reply
+                )
+                reply = await call_llm(
+                    prompts.EVIDENCE_FIX_SYSTEM,
+                    fix_prompt,
+                    context=context,
+                    model=plan.model,
+                    tag="evidence_fix",
+                )
+
            if plan.use_critic:
                if observer:
                    observer("critic", "reviewing")
@ -766,6 +785,29 @@ def _default_scores() -> AnswerScores:
    return AnswerScores(confidence=60, relevance=60, satisfaction=60, hallucination_risk="medium")


+def _needs_evidence_fix(reply: str, classify: dict[str, Any]) -> bool:
+    if not reply:
+        return False
+    lowered = reply.lower()
+    missing_markers = (
+        "don't have",
+        "do not have",
+        "don't know",
+        "cannot",
+        "can't",
+        "need to",
+        "would need",
+        "not provided",
+        "missing",
+        "no specific",
+    )
+    if classify.get("needs_snapshot") and any(marker in lowered for marker in missing_markers):
+        return True
+    if classify.get("question_type") in {"metric", "diagnostic"} and not re.search(r"\d", reply):
+        return True
+    return False
+
+
 def _resolve_path(data: Any, path: str) -> Any | None:
    cursor = data
    for part in re.split(r"\.(?![^\[]*\])", path):
--- a/atlasbot/llm/prompts.py
+++ b/atlasbot/llm/prompts.py
@ -69,11 +69,13 @@ ANSWER_SYSTEM = (
    CLUSTER_SYSTEM
    + " Answer a focused sub-question using the provided context. "
    + "Be concise and grounded. "
+    + "If the context contains explicit values relevant to the question, you must use them."
 )

 SUBANSWER_PROMPT = (
    "Answer the sub-question using the context. "
-    "If context lacks the fact, say so."
+    "If the context includes the fact, state it explicitly. "
+    "Only say the fact is missing if it truly is not present."
 )

 SYNTHESIZE_SYSTEM = (
@ -87,6 +89,19 @@ SYNTHESIZE_PROMPT = (
    "Use sub-answers as evidence, avoid raw metric dumps unless asked."
 )

+EVIDENCE_FIX_SYSTEM = (
+    CLUSTER_SYSTEM
+    + " Rewrite the draft answer if it ignored facts present in the context. "
+    + "Only use facts in the provided context."
+)
+
+EVIDENCE_FIX_PROMPT = (
+    "Check the draft against the context. "
+    "If the draft says data is missing but the context includes relevant values, "
+    "rewrite the answer to include those values. "
+    "If data is truly missing, keep the draft concise and honest."
+)
+
 DRAFT_SELECT_PROMPT = (
    "Pick the best draft for accuracy, clarity, and helpfulness. "
    "Return JSON with field: best (1-based index)."