atlasbot: improve followup hygiene

This commit is contained in:
Brad Stein 2026-02-01 11:18:06 -03:00
parent d9489f8790
commit 889e814b59
2 changed files with 75 additions and 2 deletions

View File

@ -450,6 +450,8 @@ class AnswerEngine:
if classify.get("question_type") in {"metric", "diagnostic"} and metric_facts:
reply = _metric_fact_guard(reply, metric_facts, keyword_tokens)
reply = await self._dedup_reply(reply, plan, call_llm, tag="dedup")
scores = await self._score_answer(normalized, reply, plan, call_llm)
claims = await self._extract_claims(normalized, reply, summary, call_llm)
except LLMLimitReached:
@ -577,6 +579,18 @@ class AnswerEngine:
claims.append(ClaimItem(id=claim_id, claim=claim_text, evidence=evidence_items))
return claims
async def _dedup_reply(
self,
reply: str,
plan: ModePlan,
call_llm: Callable[..., Any],
tag: str,
) -> str:
if not _needs_dedup(reply):
return reply
dedup_prompt = prompts.DEDUP_PROMPT + "\nDraft: " + reply
return await call_llm(prompts.DEDUP_SYSTEM, dedup_prompt, model=plan.fast_model, tag=tag)
async def _answer_followup(
self,
question: str,
@ -600,7 +614,39 @@ class AnswerEngine:
evidence_lines.append(f"- {ev.path}: {ev.value_at_claim}{delta_note}")
evidence_ctx = "\n".join(evidence_lines)
prompt = prompts.FOLLOWUP_PROMPT + "\nFollow-up: " + question + "\nEvidence:\n" + evidence_ctx
return await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.model, tag="followup")
reply = await call_llm(prompts.FOLLOWUP_SYSTEM, prompt, model=plan.model, tag="followup")
allowed_nodes = _allowed_nodes(summary)
allowed_namespaces = _allowed_namespaces(summary)
unknown_nodes = _find_unknown_nodes(reply, allowed_nodes)
unknown_namespaces = _find_unknown_namespaces(reply, allowed_namespaces)
extra_bits = []
if unknown_nodes:
extra_bits.append("UnknownNodes: " + ", ".join(sorted(unknown_nodes)))
if unknown_namespaces:
extra_bits.append("UnknownNamespaces: " + ", ".join(sorted(unknown_namespaces)))
if allowed_nodes:
extra_bits.append("AllowedNodes: " + ", ".join(allowed_nodes))
if allowed_namespaces:
extra_bits.append("AllowedNamespaces: " + ", ".join(allowed_namespaces))
if extra_bits:
fix_prompt = (
prompts.EVIDENCE_FIX_PROMPT
+ "\nQuestion: "
+ question
+ "\nDraft: "
+ reply
+ "\n"
+ "\n".join(extra_bits)
)
reply = await call_llm(
prompts.EVIDENCE_FIX_SYSTEM,
fix_prompt,
context="Evidence:\n" + evidence_ctx,
model=plan.model,
tag="followup_fix",
)
reply = await self._dedup_reply(reply, plan, call_llm, tag="dedup_followup")
return reply
async def _select_claims(
self,
@ -1032,6 +1078,21 @@ def _needs_evidence_fix(reply: str, classify: dict[str, Any]) -> bool:
return False
def _needs_dedup(reply: str) -> bool:
if not reply:
return False
sentences = [s.strip() for s in re.split(r"(?<=[.!?])\\s+", reply) if s.strip()]
if len(sentences) < 3:
return False
seen = set()
for sent in sentences:
norm = re.sub(r"\\s+", " ", sent.lower())
if norm in seen:
return True
seen.add(norm)
return False
def _needs_focus_fix(question: str, reply: str, classify: dict[str, Any]) -> bool:
if not reply:
return False

View File

@ -106,7 +106,9 @@ EVIDENCE_FIX_PROMPT = (
"If MustUseFacts are provided, you must incorporate them into the answer. "
"If AllowedRunbooks are provided, use an exact path from that list when answering "
"documentation or checklist questions and do not invent new paths. "
"If ResolvedRunbook is provided, you must include that exact path and must not say it is missing."
"If ResolvedRunbook is provided, you must include that exact path and must not say it is missing. "
"If AllowedNodes are provided, remove or correct any node names not in the list. "
"If AllowedNamespaces are provided, remove or correct any namespaces not in the list. "
)
RUNBOOK_ENFORCE_SYSTEM = (
@ -195,6 +197,16 @@ FOLLOWUP_PROMPT = (
"Be conversational and concise, and avoid restating all metrics."
)
DEDUP_SYSTEM = (
CLUSTER_SYSTEM
+ " Remove repeated sentences or paragraphs without dropping unique facts."
)
DEDUP_PROMPT = (
"Rewrite the draft to remove repeated sentences/paragraphs while preserving facts. "
"Return only the cleaned answer."
)
SELECT_CLAIMS_PROMPT = (
"Select relevant claim ids for the follow-up. "
"Return JSON with field: claim_ids (list)."