atlasbot: improve followup hygiene

This commit is contained in:
Brad Stein 2026-02-01 11:18:06 -03:00
parent d9489f8790
commit 889e814b59
2 changed files with 75 additions and 2 deletions

View File

@ -450,6 +450,8 @@ class AnswerEngine:
if classify.get("question_type") in {"metric", "diagnostic"} and metric_facts: if classify.get("question_type") in {"metric", "diagnostic"} and metric_facts:
reply = _metric_fact_guard(reply, metric_facts, keyword_tokens) reply = _metric_fact_guard(reply, metric_facts, keyword_tokens)
reply = await self._dedup_reply(reply, plan, call_llm, tag="dedup")
scores = await self._score_answer(normalized, reply, plan, call_llm) scores = await self._score_answer(normalized, reply, plan, call_llm)
claims = await self._extract_claims(normalized, reply, summary, call_llm) claims = await self._extract_claims(normalized, reply, summary, call_llm)
except LLMLimitReached: except LLMLimitReached:
@ -577,6 +579,18 @@ class AnswerEngine:
claims.append(ClaimItem(id=claim_id, claim=claim_text, evidence=evidence_items)) claims.append(ClaimItem(id=claim_id, claim=claim_text, evidence=evidence_items))
return claims return claims
async def _dedup_reply(
self,
reply: str,
plan: ModePlan,
call_llm: Callable[..., Any],
tag: str,
) -> str:
if not _needs_dedup(reply):
return reply
dedup_prompt = prompts.DEDUP_PROMPT + "\nDraft: " + reply
return await call_llm(prompts.DEDUP_SYSTEM, dedup_prompt, model=plan.fast_model, tag=tag)
async def _answer_followup( async def _answer_followup(
self, self,
question: str, question: str,
@ -600,7 +614,39 @@ class AnswerEngine:
evidence_lines.append(f"- {ev.path}: {ev.value_at_claim}{delta_note}") evidence_lines.append(f"- {ev.path}: {ev.value_at_claim}{delta_note}")
evidence_ctx = "\n".join(evidence_lines) evidence_ctx = "\n".join(evidence_lines)
prompt = prompts.FOLLOWUP_PROMPT + "\nFollow-up: " + question + "\nEvidence:\n" + evidence_ctx prompt = prompts.FOLLOWUP_PROMPT + "\nFollow-up: " + question + "\nEvidence:\n" + evidence_ctx
return await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.model, tag="followup") reply = await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.model, tag="followup")
allowed_nodes = _allowed_nodes(summary)
allowed_namespaces = _allowed_namespaces(summary)
unknown_nodes = _find_unknown_nodes(reply, allowed_nodes)
unknown_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
extra_bits = []
if unknown_nodes:
extra_bits.append("UnknownNodes: " + ", ".join(sorted(unknown_nodes)))
if unknown_namespaces:
extra_bits.append("UnknownNamespaces: " + ", ".join(sorted(unknown_namespaces)))
if allowed_nodes:
extra_bits.append("AllowedNodes: " + ", ".join(allowed_nodes))
if allowed_namespaces:
extra_bits.append("AllowedNamespaces: " + ", ".join(allowed_namespaces))
if extra_bits:
fix_prompt = (
prompts.EVIDENCE_FIX_PROMPT
+ "\nQuestion: "
+ question
+ "\nDraft: "
+ reply
+ "\n"
+ "\n".join(extra_bits)
)
reply = await call_llm(
prompts.EVIDENCE_FIX_SYSTEM,
fix_prompt,
context="Evidence:\n" + evidence_ctx,
model=plan.model,
tag="followup_fix",
)
reply = await self._dedup_reply(reply, plan, call_llm, tag="dedup_followup")
return reply
async def _select_claims( async def _select_claims(
self, self,
@ -1032,6 +1078,21 @@ def _needs_evidence_fix(reply: str, classify: dict[str, Any]) -> bool:
return False return False
def _needs_dedup(reply: str) -> bool:
if not reply:
return False
sentences = [s.strip() for s in re.split(r"(?<=[.!?])\\s+", reply) if s.strip()]
if len(sentences) < 3:
return False
seen = set()
for sent in sentences:
norm = re.sub(r"\\s+", " ", sent.lower())
if norm in seen:
return True
seen.add(norm)
return False
def _needs_focus_fix(question: str, reply: str, classify: dict[str, Any]) -> bool: def _needs_focus_fix(question: str, reply: str, classify: dict[str, Any]) -> bool:
if not reply: if not reply:
return False return False

View File

@ -107,6 +107,8 @@ EVIDENCE_FIX_PROMPT = (
"If AllowedRunbooks are provided, use an exact path from that list when answering " "If AllowedRunbooks are provided, use an exact path from that list when answering "
"documentation or checklist questions and do not invent new paths. " "documentation or checklist questions and do not invent new paths. "
"If ResolvedRunbook is provided, you must include that exact path and must not say it is missing. " "If ResolvedRunbook is provided, you must include that exact path and must not say it is missing. "
"If AllowedNodes are provided, remove or correct any node names not in the list. "
"If AllowedNamespaces are provided, remove or correct any namespaces not in the list. "
) )
RUNBOOK_ENFORCE_SYSTEM = ( RUNBOOK_ENFORCE_SYSTEM = (
@ -195,6 +197,16 @@ FOLLOWUP_PROMPT = (
"Be conversational and concise, and avoid restating all metrics." "Be conversational and concise, and avoid restating all metrics."
) )
DEDUP_SYSTEM = (
CLUSTER_SYSTEM
+ " Remove repeated sentences or paragraphs without dropping unique facts."
)
DEDUP_PROMPT = (
"Rewrite the draft to remove repeated sentences/paragraphs while preserving facts. "
"Return only the cleaned answer."
)
SELECT_CLAIMS_PROMPT = ( SELECT_CLAIMS_PROMPT = (
"Select relevant claim ids for the follow-up. " "Select relevant claim ids for the follow-up. "
"Return JSON with field: claim_ids (list)." "Return JSON with field: claim_ids (list)."