atlasbot: enforce evidence in answers

2026-01-31 22:27:32 -03:00 · 2026-01-31 22:27:32 -03:00 · 81e2c65a21
commit 81e2c65a21
parent c1f1ef23a6
2 changed files with 59 additions and 2 deletions
--- a/atlasbot/engine/answerer.py
+++ b/atlasbot/engine/answerer.py
@ -121,6 +121,7 @@ class AnswerEngine:
            "tool",
            "followup",
            "select_claims",
            "evidence_fix",
        }
        def _debug_log(name: str, payload: Any) -> None:
@ -274,6 +275,24 @@ class AnswerEngine:
                observer("synthesize", "synthesizing")
            reply = await self._synthesize_answer(normalized, subanswers, context, classify, plan, call_llm)
            if snapshot_context and _needs_evidence_fix(reply, classify):
                if observer:
                    observer("evidence_fix", "repairing missing evidence")
                fix_prompt = (
                    prompts.EVIDENCE_FIX_PROMPT
                    + "\nQuestion: "
                    + normalized
                    + "\nDraft: "
                    + reply
                )
                reply = await call_llm(
                    prompts.EVIDENCE_FIX_SYSTEM,
                    fix_prompt,
                    context=context,
                    model=plan.model,
                    tag="evidence_fix",
                )
            if plan.use_critic:
                if observer:
                    observer("critic", "reviewing")
@ -766,6 +785,29 @@ def _default_scores() -> AnswerScores:
    return AnswerScores(confidence=60, relevance=60, satisfaction=60, hallucination_risk="medium")
 def _needs_evidence_fix(reply: str, classify: dict[str, Any]) -> bool:
    if not reply:
        return False
    lowered = reply.lower()
    missing_markers = (
        "don't have",
        "do not have",
        "don't know",
        "cannot",
        "can't",
        "need to",
        "would need",
        "not provided",
        "missing",
        "no specific",
    )
    if classify.get("needs_snapshot") and any(marker in lowered for marker in missing_markers):
        return True
    if classify.get("question_type") in {"metric", "diagnostic"} and not re.search(r"\d", reply):
        return True
    return False
 def _resolve_path(data: Any, path: str) -> Any | None:
    cursor = data
    for part in re.split(r"\.(?![^\[]*\])", path):
--- a/atlasbot/llm/prompts.py
+++ b/atlasbot/llm/prompts.py
@ -68,12 +68,14 @@ TOOL_PROMPT = (
 ANSWER_SYSTEM = (
    CLUSTER_SYSTEM
    + " Answer a focused sub-question using the provided context. "
-    + "Be concise and grounded."
+    + "Be concise and grounded. "
    + "If the context contains explicit values relevant to the question, you must use them."
 )
 SUBANSWER_PROMPT = (
    "Answer the sub-question using the context. "
-    "If context lacks the fact, say so."
+    "If the context includes the fact, state it explicitly. "
    "Only say the fact is missing if it truly is not present."
 )
 SYNTHESIZE_SYSTEM = (
@ -87,6 +89,19 @@ SYNTHESIZE_PROMPT = (
    "Use sub-answers as evidence, avoid raw metric dumps unless asked."
 )
 EVIDENCE_FIX_SYSTEM = (
    CLUSTER_SYSTEM
    + " Rewrite the draft answer if it ignored facts present in the context. "
    + "Only use facts in the provided context."
 )
 EVIDENCE_FIX_PROMPT = (
    "Check the draft against the context. "
    "If the draft says data is missing but the context includes relevant values, "
    "rewrite the answer to include those values. "
    "If data is truly missing, keep the draft concise and honest."
 )
 DRAFT_SELECT_PROMPT = (
    "Pick the best draft for accuracy, clarity, and helpfulness. "
    "Return JSON with field: best (1-based index)."