atlasbot: stabilize open-ended responses

2026-02-04 22:15:12 -03:00 · 2026-02-04 22:15:12 -03:00 · 6def6c167c
commit 6def6c167c
parent 92bc8b642d
3 changed files with 106 additions and 7 deletions
--- a/atlasbot/engine/answerer.py
+++ b/atlasbot/engine/answerer.py
@ -764,7 +764,7 @@ class AnswerEngine:
                if not metric_facts or not _has_keyword_overlap(metric_facts, keyword_tokens):
                    best_line = _best_keyword_line(summary_lines, keyword_tokens)
                    if best_line:
-                        reply = f"From the latest snapshot: {best_line}."
+                        reply = f"Latest metrics: {best_line}."
            if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and metric_facts:
                best_line = None
                lowered_keywords = [kw.lower() for kw in keyword_tokens if kw]
@ -777,7 +777,20 @@ class AnswerEngine:
                reply_numbers = set(re.findall(r"\d+(?:\.\d+)?", reply))
                fact_numbers = set(re.findall(r"\d+(?:\.\d+)?", " ".join(metric_facts)))
                if not reply_numbers or (fact_numbers and not (reply_numbers & fact_numbers)):
-                    reply = f"From the latest snapshot: {best_line}."
+                    reply = f"Latest metrics: {best_line}."
            if _should_use_insight_guard(classify):
                if observer:
                    observer("insight_guard", "checking for concrete signals")
                reply = await _apply_insight_guard(
                    normalized,
                    reply,
                    classify,
                    context,
                    plan,
                    call_llm,
                    metric_facts or key_facts,
                )
            if plan.use_critic:
                if observer:
@ -823,6 +836,9 @@ class AnswerEngine:
                extra={"extra": {"mode": mode, "seconds": elapsed, "llm_calls": call_count, "limit": call_cap, "limit_hit": limit_hit}},
            )
        if limit_hit and "run limitless" not in reply.lower():
            reply = reply.rstrip() + "\n\nNote: I hit my reasoning limit. Ask again with 'Run limitless' for a deeper pass."
        if conversation_id and claims:
            self._store_state(conversation_id, claims, summary, snapshot_used, pin_snapshot)
@ -2425,6 +2441,46 @@ def _needs_evidence_fix(reply: str, classify: dict[str, Any]) -> bool:
    return False
 def _should_use_insight_guard(classify: dict[str, Any]) -> bool:
    style = (classify.get("answer_style") or "").strip().lower()
    qtype = (classify.get("question_type") or "").strip().lower()
    return style == "insightful" or qtype in {"open_ended", "planning"}
 async def _apply_insight_guard(
    question: str,
    reply: str,
    classify: dict[str, Any],
    context: str,
    plan: ModePlan,
    call_llm: Callable[..., Awaitable[str]],
    facts: list[str],
 ) -> str:
    if not reply or not _should_use_insight_guard(classify):
        return reply
    guard_prompt = prompts.INSIGHT_GUARD_PROMPT.format(question=question, answer=reply)
    guard_raw = await call_llm(
        prompts.INSIGHT_GUARD_SYSTEM,
        guard_prompt,
        context=context,
        model=plan.fast_model,
        tag="insight_guard",
    )
    guard = _parse_json_block(guard_raw, fallback={})
    if guard.get("ok") is True:
        return reply
    fix_prompt = prompts.INSIGHT_FIX_PROMPT.format(question=question, answer=reply)
    if facts:
        fix_prompt = fix_prompt + "\nFacts:\n" + "\n".join(facts[:6])
    return await call_llm(
        prompts.INSIGHT_FIX_SYSTEM,
        fix_prompt,
        context=context,
        model=plan.model,
        tag="insight_fix",
    )
 def _reply_matches_metric_facts(reply: str, metric_facts: list[str], tokens: list[str] | set[str] | None = None) -> bool:
    if not reply or not metric_facts:
        return True
--- a/atlasbot/llm/prompts.py
+++ b/atlasbot/llm/prompts.py
@ -134,7 +134,8 @@ SYNTHESIZE_SYSTEM = (
 SYNTHESIZE_PROMPT = (
    "Write a final response to the user. "
    "Use sub-answers as evidence, avoid raw metric dumps unless asked. "
-    "If Style is insightful or the question is open-ended, choose 1-2 salient points and explain why they stand out. "
+    "If Style is insightful or the question is open-ended, choose at least 2 salient points (or 1 if only one exists) "
    "and explain why they stand out. "
    "If Style is direct, answer concisely with the specific value requested."
 )
@ -173,6 +174,35 @@ EVIDENCE_GUARD_PROMPT = (
    "Return the corrected answer only."
 )
 INSIGHT_GUARD_SYSTEM = (
    CLUSTER_SYSTEM
    + " Check if an open-ended answer includes at least two concrete signals. "
    + "Return JSON only."
 )
 INSIGHT_GUARD_PROMPT = (
    "Question: {question}\n"
    "Answer: {answer}\n\n"
    "Return JSON with fields: ok (bool), reason (string). "
    "ok=true only if the answer includes at least two concrete signals with what+where+evidence "
    "(metric name + value or explicit condition)."
 )
 INSIGHT_FIX_SYSTEM = (
    CLUSTER_SYSTEM
    + " Rewrite the answer to be insightful and grounded for open-ended questions. "
    + "Include at least two concrete signals with what+where+evidence. "
    + "If nothing stands out, say so explicitly, then mention two most notable normal signals with evidence. "
    + "Return the answer only."
 )
 INSIGHT_FIX_PROMPT = (
    "Question: {question}\n"
    "Draft: {answer}\n"
    "If Facts are provided, use them. "
    "Avoid saying 'based on the snapshot' or 'based on the context'."
 )
 RUNBOOK_ENFORCE_SYSTEM = (
    CLUSTER_SYSTEM
    + " Ensure the answer includes the required runbook path. "
--- a/atlasbot/matrix/bot.py
+++ b/atlasbot/matrix/bot.py
@ -146,14 +146,27 @@ class MatrixBot:
            latest["note"] = note
        async def heartbeat() -> None:
            last_note = ""
            last_sent = 0.0
            while not stop.is_set():
                await asyncio.sleep(self._settings.thinking_interval_sec)
                if stop.is_set():
                    break
-                note = (latest.get("note") or "thinking").strip()
+                note = (latest.get("note") or latest.get("stage") or "thinking").strip()
-                snippet = note[:32]
+                if not note:
-                msg = f"Still thinking ({snippet})…"
+                    note = "thinking"
                snippet = note[:64]
                now = time.monotonic()
                should_send = False
                if snippet and snippet != last_note:
                    should_send = True
                elif now - last_sent >= max(60.0, self._settings.thinking_interval_sec * 2):
                    should_send = True
                if should_send:
                    msg = f"Still thinking — {snippet}…"
                    await self._client.send_message(token, room_id, msg)
                    last_note = snippet
                    last_sent = now
        task = asyncio.create_task(heartbeat())
        started = time.monotonic()