atlasbot: stabilize open-ended responses

This commit is contained in:
Brad Stein 2026-02-04 22:15:12 -03:00
parent 92bc8b642d
commit 6def6c167c
3 changed files with 106 additions and 7 deletions

View File

@ -764,7 +764,7 @@ class AnswerEngine:
if not metric_facts or not _has_keyword_overlap(metric_facts, keyword_tokens): if not metric_facts or not _has_keyword_overlap(metric_facts, keyword_tokens):
best_line = _best_keyword_line(summary_lines, keyword_tokens) best_line = _best_keyword_line(summary_lines, keyword_tokens)
if best_line: if best_line:
reply = f"From the latest snapshot: {best_line}." reply = f"Latest metrics: {best_line}."
if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and metric_facts: if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and metric_facts:
best_line = None best_line = None
lowered_keywords = [kw.lower() for kw in keyword_tokens if kw] lowered_keywords = [kw.lower() for kw in keyword_tokens if kw]
@ -777,7 +777,20 @@ class AnswerEngine:
reply_numbers = set(re.findall(r"\d+(?:\.\d+)?", reply)) reply_numbers = set(re.findall(r"\d+(?:\.\d+)?", reply))
fact_numbers = set(re.findall(r"\d+(?:\.\d+)?", " ".join(metric_facts))) fact_numbers = set(re.findall(r"\d+(?:\.\d+)?", " ".join(metric_facts)))
if not reply_numbers or (fact_numbers and not (reply_numbers & fact_numbers)): if not reply_numbers or (fact_numbers and not (reply_numbers & fact_numbers)):
reply = f"From the latest snapshot: {best_line}." reply = f"Latest metrics: {best_line}."
if _should_use_insight_guard(classify):
if observer:
observer("insight_guard", "checking for concrete signals")
reply = await _apply_insight_guard(
normalized,
reply,
classify,
context,
plan,
call_llm,
metric_facts or key_facts,
)
if plan.use_critic: if plan.use_critic:
if observer: if observer:
@ -823,6 +836,9 @@ class AnswerEngine:
extra={"extra": {"mode": mode, "seconds": elapsed, "llm_calls": call_count, "limit": call_cap, "limit_hit": limit_hit}}, extra={"extra": {"mode": mode, "seconds": elapsed, "llm_calls": call_count, "limit": call_cap, "limit_hit": limit_hit}},
) )
if limit_hit and "run limitless" not in reply.lower():
reply = reply.rstrip() + "\n\nNote: I hit my reasoning limit. Ask again with 'Run limitless' for a deeper pass."
if conversation_id and claims: if conversation_id and claims:
self._store_state(conversation_id, claims, summary, snapshot_used, pin_snapshot) self._store_state(conversation_id, claims, summary, snapshot_used, pin_snapshot)
@ -2425,6 +2441,46 @@ def _needs_evidence_fix(reply: str, classify: dict[str, Any]) -> bool:
return False return False
def _should_use_insight_guard(classify: dict[str, Any]) -> bool:
style = (classify.get("answer_style") or "").strip().lower()
qtype = (classify.get("question_type") or "").strip().lower()
return style == "insightful" or qtype in {"open_ended", "planning"}
async def _apply_insight_guard(
question: str,
reply: str,
classify: dict[str, Any],
context: str,
plan: ModePlan,
call_llm: Callable[..., Awaitable[str]],
facts: list[str],
) -> str:
if not reply or not _should_use_insight_guard(classify):
return reply
guard_prompt = prompts.INSIGHT_GUARD_PROMPT.format(question=question, answer=reply)
guard_raw = await call_llm(
prompts.INSIGHT_GUARD_SYSTEM,
guard_prompt,
context=context,
model=plan.fast_model,
tag="insight_guard",
)
guard = _parse_json_block(guard_raw, fallback={})
if guard.get("ok") is True:
return reply
fix_prompt = prompts.INSIGHT_FIX_PROMPT.format(question=question, answer=reply)
if facts:
fix_prompt = fix_prompt + "\nFacts:\n" + "\n".join(facts[:6])
return await call_llm(
prompts.INSIGHT_FIX_SYSTEM,
fix_prompt,
context=context,
model=plan.model,
tag="insight_fix",
)
def _reply_matches_metric_facts(reply: str, metric_facts: list[str], tokens: list[str] | set[str] | None = None) -> bool: def _reply_matches_metric_facts(reply: str, metric_facts: list[str], tokens: list[str] | set[str] | None = None) -> bool:
if not reply or not metric_facts: if not reply or not metric_facts:
return True return True

View File

@ -134,7 +134,8 @@ SYNTHESIZE_SYSTEM = (
SYNTHESIZE_PROMPT = ( SYNTHESIZE_PROMPT = (
"Write a final response to the user. " "Write a final response to the user. "
"Use sub-answers as evidence, avoid raw metric dumps unless asked. " "Use sub-answers as evidence, avoid raw metric dumps unless asked. "
"If Style is insightful or the question is open-ended, choose 1-2 salient points and explain why they stand out. " "If Style is insightful or the question is open-ended, choose at least 2 salient points (or 1 if only one exists) "
"and explain why they stand out. "
"If Style is direct, answer concisely with the specific value requested." "If Style is direct, answer concisely with the specific value requested."
) )
@ -173,6 +174,35 @@ EVIDENCE_GUARD_PROMPT = (
"Return the corrected answer only." "Return the corrected answer only."
) )
INSIGHT_GUARD_SYSTEM = (
CLUSTER_SYSTEM
+ " Check if an open-ended answer includes at least two concrete signals. "
+ "Return JSON only."
)
INSIGHT_GUARD_PROMPT = (
"Question: {question}\n"
"Answer: {answer}\n\n"
"Return JSON with fields: ok (bool), reason (string). "
"ok=true only if the answer includes at least two concrete signals with what+where+evidence "
"(metric name + value or explicit condition)."
)
INSIGHT_FIX_SYSTEM = (
CLUSTER_SYSTEM
+ " Rewrite the answer to be insightful and grounded for open-ended questions. "
+ "Include at least two concrete signals with what+where+evidence. "
+ "If nothing stands out, say so explicitly, then mention two most notable normal signals with evidence. "
+ "Return the answer only."
)
INSIGHT_FIX_PROMPT = (
"Question: {question}\n"
"Draft: {answer}\n"
"If Facts are provided, use them. "
"Avoid saying 'based on the snapshot' or 'based on the context'."
)
RUNBOOK_ENFORCE_SYSTEM = ( RUNBOOK_ENFORCE_SYSTEM = (
CLUSTER_SYSTEM CLUSTER_SYSTEM
+ " Ensure the answer includes the required runbook path. " + " Ensure the answer includes the required runbook path. "

View File

@ -146,14 +146,27 @@ class MatrixBot:
latest["note"] = note latest["note"] = note
async def heartbeat() -> None: async def heartbeat() -> None:
last_note = ""
last_sent = 0.0
while not stop.is_set(): while not stop.is_set():
await asyncio.sleep(self._settings.thinking_interval_sec) await asyncio.sleep(self._settings.thinking_interval_sec)
if stop.is_set(): if stop.is_set():
break break
note = (latest.get("note") or "thinking").strip() note = (latest.get("note") or latest.get("stage") or "thinking").strip()
snippet = note[:32] if not note:
msg = f"Still thinking ({snippet})…" note = "thinking"
snippet = note[:64]
now = time.monotonic()
should_send = False
if snippet and snippet != last_note:
should_send = True
elif now - last_sent >= max(60.0, self._settings.thinking_interval_sec * 2):
should_send = True
if should_send:
msg = f"Still thinking — {snippet}"
await self._client.send_message(token, room_id, msg) await self._client.send_message(token, room_id, msg)
last_note = snippet
last_sent = now
task = asyncio.create_task(heartbeat()) task = asyncio.create_task(heartbeat())
started = time.monotonic() started = time.monotonic()