atlasbot: improve followup hygiene

2026-02-01 11:18:06 -03:00 · 2026-02-01 11:18:06 -03:00 · 889e814b59
commit 889e814b59
parent d9489f8790
2 changed files with 75 additions and 2 deletions
--- a/atlasbot/engine/answerer.py
+++ b/atlasbot/engine/answerer.py
@ -450,6 +450,8 @@ class AnswerEngine:
            if classify.get("question_type") in {"metric", "diagnostic"} and metric_facts:
                reply = _metric_fact_guard(reply, metric_facts, keyword_tokens)

+            reply = await self._dedup_reply(reply, plan, call_llm, tag="dedup")
+
            scores = await self._score_answer(normalized, reply, plan, call_llm)
            claims = await self._extract_claims(normalized, reply, summary, call_llm)
        except LLMLimitReached:
@ -577,6 +579,18 @@ class AnswerEngine:
                    claims.append(ClaimItem(id=claim_id, claim=claim_text, evidence=evidence_items))
        return claims

+    async def _dedup_reply(
+        self,
+        reply: str,
+        plan: ModePlan,
+        call_llm: Callable[..., Any],
+        tag: str,
+    ) -> str:
+        if not _needs_dedup(reply):
+            return reply
+        dedup_prompt = prompts.DEDUP_PROMPT + "\nDraft: " + reply
+        return await call_llm(prompts.DEDUP_SYSTEM, dedup_prompt, model=plan.fast_model, tag=tag)
+
    async def _answer_followup(
        self,
        question: str,
@ -600,7 +614,39 @@ class AnswerEngine:
                evidence_lines.append(f"- {ev.path}: {ev.value_at_claim}{delta_note}")
        evidence_ctx = "\n".join(evidence_lines)
        prompt = prompts.FOLLOWUP_PROMPT + "\nFollow-up: " + question + "\nEvidence:\n" + evidence_ctx
-        return await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.model, tag="followup")
+        reply = await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.model, tag="followup")
+        allowed_nodes = _allowed_nodes(summary)
+        allowed_namespaces = _allowed_namespaces(summary)
+        unknown_nodes = _find_unknown_nodes(reply, allowed_nodes)
+        unknown_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
+        extra_bits = []
+        if unknown_nodes:
+            extra_bits.append("UnknownNodes: " + ", ".join(sorted(unknown_nodes)))
+        if unknown_namespaces:
+            extra_bits.append("UnknownNamespaces: " + ", ".join(sorted(unknown_namespaces)))
+        if allowed_nodes:
+            extra_bits.append("AllowedNodes: " + ", ".join(allowed_nodes))
+        if allowed_namespaces:
+            extra_bits.append("AllowedNamespaces: " + ", ".join(allowed_namespaces))
+        if extra_bits:
+            fix_prompt = (
+                prompts.EVIDENCE_FIX_PROMPT
+                + "\nQuestion: "
+                + question
+                + "\nDraft: "
+                + reply
+                + "\n"
+                + "\n".join(extra_bits)
+            )
+            reply = await call_llm(
+                prompts.EVIDENCE_FIX_SYSTEM,
+                fix_prompt,
+                context="Evidence:\n" + evidence_ctx,
+                model=plan.model,
+                tag="followup_fix",
+            )
+        reply = await self._dedup_reply(reply, plan, call_llm, tag="dedup_followup")
+        return reply

    async def _select_claims(
        self,
@ -1032,6 +1078,21 @@ def _needs_evidence_fix(reply: str, classify: dict[str, Any]) -> bool:
    return False


+def _needs_dedup(reply: str) -> bool:
+    if not reply:
+        return False
+    sentences = [s.strip() for s in re.split(r"(?<=[.!?])\\s+", reply) if s.strip()]
+    if len(sentences) < 3:
+        return False
+    seen = set()
+    for sent in sentences:
+        norm = re.sub(r"\\s+", " ", sent.lower())
+        if norm in seen:
+            return True
+        seen.add(norm)
+    return False
+
+
 def _needs_focus_fix(question: str, reply: str, classify: dict[str, Any]) -> bool:
    if not reply:
        return False
--- a/atlasbot/llm/prompts.py
+++ b/atlasbot/llm/prompts.py
@ -106,7 +106,9 @@ EVIDENCE_FIX_PROMPT = (
    "If MustUseFacts are provided, you must incorporate them into the answer. "
    "If AllowedRunbooks are provided, use an exact path from that list when answering "
    "documentation or checklist questions and do not invent new paths. "
-    "If ResolvedRunbook is provided, you must include that exact path and must not say it is missing."
+    "If ResolvedRunbook is provided, you must include that exact path and must not say it is missing. "
+    "If AllowedNodes are provided, remove or correct any node names not in the list. "
+    "If AllowedNamespaces are provided, remove or correct any namespaces not in the list. "
 )

 RUNBOOK_ENFORCE_SYSTEM = (
@ -195,6 +197,16 @@ FOLLOWUP_PROMPT = (
    "Be conversational and concise, and avoid restating all metrics."
 )

+DEDUP_SYSTEM = (
+    CLUSTER_SYSTEM
+    + " Remove repeated sentences or paragraphs without dropping unique facts."
+)
+
+DEDUP_PROMPT = (
+    "Rewrite the draft to remove repeated sentences/paragraphs while preserving facts. "
+    "Return only the cleaned answer."
+)
+
 SELECT_CLAIMS_PROMPT = (
    "Select relevant claim ids for the follow-up. "
    "Return JSON with field: claim_ids (list)."