diff --git a/atlasbot/engine/answerer.py b/atlasbot/engine/answerer.py index 3316a3c..ce366d5 100644 --- a/atlasbot/engine/answerer.py +++ b/atlasbot/engine/answerer.py @@ -450,6 +450,8 @@ class AnswerEngine: if classify.get("question_type") in {"metric", "diagnostic"} and metric_facts: reply = _metric_fact_guard(reply, metric_facts, keyword_tokens) + reply = await self._dedup_reply(reply, plan, call_llm, tag="dedup") + scores = await self._score_answer(normalized, reply, plan, call_llm) claims = await self._extract_claims(normalized, reply, summary, call_llm) except LLMLimitReached: @@ -577,6 +579,18 @@ class AnswerEngine: claims.append(ClaimItem(id=claim_id, claim=claim_text, evidence=evidence_items)) return claims + async def _dedup_reply( + self, + reply: str, + plan: ModePlan, + call_llm: Callable[..., Any], + tag: str, + ) -> str: + if not _needs_dedup(reply): + return reply + dedup_prompt = prompts.DEDUP_PROMPT + "\nDraft: " + reply + return await call_llm(prompts.DEDUP_SYSTEM, dedup_prompt, model=plan.fast_model, tag=tag) + async def _answer_followup( self, question: str, @@ -600,7 +614,39 @@ class AnswerEngine: evidence_lines.append(f"- {ev.path}: {ev.value_at_claim}{delta_note}") evidence_ctx = "\n".join(evidence_lines) prompt = prompts.FOLLOWUP_PROMPT + "\nFollow-up: " + question + "\nEvidence:\n" + evidence_ctx - return await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.model, tag="followup") + reply = await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.model, tag="followup") + allowed_nodes = _allowed_nodes(summary) + allowed_namespaces = _allowed_namespaces(summary) + unknown_nodes = _find_unknown_nodes(reply, allowed_nodes) + unknown_namespaces = _find_unknown_namespaces(reply, allowed_namespaces) + extra_bits = [] + if unknown_nodes: + extra_bits.append("UnknownNodes: " + ", ".join(sorted(unknown_nodes))) + if unknown_namespaces: + extra_bits.append("UnknownNamespaces: " + ", ".join(sorted(unknown_namespaces))) + if allowed_nodes: + extra_bits.append("AllowedNodes: " + ", ".join(allowed_nodes)) + if allowed_namespaces: + extra_bits.append("AllowedNamespaces: " + ", ".join(allowed_namespaces)) + if extra_bits: + fix_prompt = ( + prompts.EVIDENCE_FIX_PROMPT + + "\nQuestion: " + + question + + "\nDraft: " + + reply + + "\n" + + "\n".join(extra_bits) + ) + reply = await call_llm( + prompts.EVIDENCE_FIX_SYSTEM, + fix_prompt, + context="Evidence:\n" + evidence_ctx, + model=plan.model, + tag="followup_fix", + ) + reply = await self._dedup_reply(reply, plan, call_llm, tag="dedup_followup") + return reply async def _select_claims( self, @@ -1032,6 +1078,21 @@ def _needs_evidence_fix(reply: str, classify: dict[str, Any]) -> bool: return False +def _needs_dedup(reply: str) -> bool: + if not reply: + return False + sentences = [s.strip() for s in re.split(r"(?<=[.!?])\\s+", reply) if s.strip()] + if len(sentences) < 3: + return False + seen = set() + for sent in sentences: + norm = re.sub(r"\\s+", " ", sent.lower()) + if norm in seen: + return True + seen.add(norm) + return False + + def _needs_focus_fix(question: str, reply: str, classify: dict[str, Any]) -> bool: if not reply: return False diff --git a/atlasbot/llm/prompts.py b/atlasbot/llm/prompts.py index f95d991..37691ea 100644 --- a/atlasbot/llm/prompts.py +++ b/atlasbot/llm/prompts.py @@ -106,7 +106,9 @@ EVIDENCE_FIX_PROMPT = ( "If MustUseFacts are provided, you must incorporate them into the answer. " "If AllowedRunbooks are provided, use an exact path from that list when answering " "documentation or checklist questions and do not invent new paths. " - "If ResolvedRunbook is provided, you must include that exact path and must not say it is missing." + "If ResolvedRunbook is provided, you must include that exact path and must not say it is missing. " + "If AllowedNodes are provided, remove or correct any node names not in the list. " + "If AllowedNamespaces are provided, remove or correct any namespaces not in the list. " ) RUNBOOK_ENFORCE_SYSTEM = ( @@ -195,6 +197,16 @@ FOLLOWUP_PROMPT = ( "Be conversational and concise, and avoid restating all metrics." ) +DEDUP_SYSTEM = ( + CLUSTER_SYSTEM + + " Remove repeated sentences or paragraphs without dropping unique facts." +) + +DEDUP_PROMPT = ( + "Rewrite the draft to remove repeated sentences/paragraphs while preserving facts. " + "Return only the cleaned answer." +) + SELECT_CLAIMS_PROMPT = ( "Select relevant claim ids for the follow-up. " "Return JSON with field: claim_ids (list)."