diff --git a/atlasbot/engine/answerer.py b/atlasbot/engine/answerer.py index d57ba38..4ea70b8 100644 --- a/atlasbot/engine/answerer.py +++ b/atlasbot/engine/answerer.py @@ -764,7 +764,7 @@ class AnswerEngine: if not metric_facts or not _has_keyword_overlap(metric_facts, keyword_tokens): best_line = _best_keyword_line(summary_lines, keyword_tokens) if best_line: - reply = f"From the latest snapshot: {best_line}." + reply = f"Latest metrics: {best_line}." if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and metric_facts: best_line = None lowered_keywords = [kw.lower() for kw in keyword_tokens if kw] @@ -777,7 +777,20 @@ class AnswerEngine: reply_numbers = set(re.findall(r"\d+(?:\.\d+)?", reply)) fact_numbers = set(re.findall(r"\d+(?:\.\d+)?", " ".join(metric_facts))) if not reply_numbers or (fact_numbers and not (reply_numbers & fact_numbers)): - reply = f"From the latest snapshot: {best_line}." + reply = f"Latest metrics: {best_line}." + + if _should_use_insight_guard(classify): + if observer: + observer("insight_guard", "checking for concrete signals") + reply = await _apply_insight_guard( + normalized, + reply, + classify, + context, + plan, + call_llm, + metric_facts or key_facts, + ) if plan.use_critic: if observer: @@ -823,6 +836,9 @@ class AnswerEngine: extra={"extra": {"mode": mode, "seconds": elapsed, "llm_calls": call_count, "limit": call_cap, "limit_hit": limit_hit}}, ) + if limit_hit and "run limitless" not in reply.lower(): + reply = reply.rstrip() + "\n\nNote: I hit my reasoning limit. Ask again with 'Run limitless' for a deeper pass." + if conversation_id and claims: self._store_state(conversation_id, claims, summary, snapshot_used, pin_snapshot) @@ -2425,6 +2441,46 @@ def _needs_evidence_fix(reply: str, classify: dict[str, Any]) -> bool: return False +def _should_use_insight_guard(classify: dict[str, Any]) -> bool: + style = (classify.get("answer_style") or "").strip().lower() + qtype = (classify.get("question_type") or "").strip().lower() + return style == "insightful" or qtype in {"open_ended", "planning"} + + +async def _apply_insight_guard( + question: str, + reply: str, + classify: dict[str, Any], + context: str, + plan: ModePlan, + call_llm: Callable[..., Awaitable[str]], + facts: list[str], +) -> str: + if not reply or not _should_use_insight_guard(classify): + return reply + guard_prompt = prompts.INSIGHT_GUARD_PROMPT.format(question=question, answer=reply) + guard_raw = await call_llm( + prompts.INSIGHT_GUARD_SYSTEM, + guard_prompt, + context=context, + model=plan.fast_model, + tag="insight_guard", + ) + guard = _parse_json_block(guard_raw, fallback={}) + if guard.get("ok") is True: + return reply + fix_prompt = prompts.INSIGHT_FIX_PROMPT.format(question=question, answer=reply) + if facts: + fix_prompt = fix_prompt + "\nFacts:\n" + "\n".join(facts[:6]) + return await call_llm( + prompts.INSIGHT_FIX_SYSTEM, + fix_prompt, + context=context, + model=plan.model, + tag="insight_fix", + ) + + def _reply_matches_metric_facts(reply: str, metric_facts: list[str], tokens: list[str] | set[str] | None = None) -> bool: if not reply or not metric_facts: return True diff --git a/atlasbot/llm/prompts.py b/atlasbot/llm/prompts.py index fb52118..844071e 100644 --- a/atlasbot/llm/prompts.py +++ b/atlasbot/llm/prompts.py @@ -134,7 +134,8 @@ SYNTHESIZE_SYSTEM = ( SYNTHESIZE_PROMPT = ( "Write a final response to the user. " "Use sub-answers as evidence, avoid raw metric dumps unless asked. " - "If Style is insightful or the question is open-ended, choose 1-2 salient points and explain why they stand out. " + "If Style is insightful or the question is open-ended, choose at least 2 salient points (or 1 if only one exists) " + "and explain why they stand out. " "If Style is direct, answer concisely with the specific value requested." ) @@ -173,6 +174,35 @@ EVIDENCE_GUARD_PROMPT = ( "Return the corrected answer only." ) +INSIGHT_GUARD_SYSTEM = ( + CLUSTER_SYSTEM + + " Check if an open-ended answer includes at least two concrete signals. " + + "Return JSON only." +) + +INSIGHT_GUARD_PROMPT = ( + "Question: {question}\n" + "Answer: {answer}\n\n" + "Return JSON with fields: ok (bool), reason (string). " + "ok=true only if the answer includes at least two concrete signals with what+where+evidence " + "(metric name + value or explicit condition)." +) + +INSIGHT_FIX_SYSTEM = ( + CLUSTER_SYSTEM + + " Rewrite the answer to be insightful and grounded for open-ended questions. " + + "Include at least two concrete signals with what+where+evidence. " + + "If nothing stands out, say so explicitly, then mention two most notable normal signals with evidence. " + + "Return the answer only." +) + +INSIGHT_FIX_PROMPT = ( + "Question: {question}\n" + "Draft: {answer}\n" + "If Facts are provided, use them. " + "Avoid saying 'based on the snapshot' or 'based on the context'." +) + RUNBOOK_ENFORCE_SYSTEM = ( CLUSTER_SYSTEM + " Ensure the answer includes the required runbook path. " diff --git a/atlasbot/matrix/bot.py b/atlasbot/matrix/bot.py index 07147cb..0a6159e 100644 --- a/atlasbot/matrix/bot.py +++ b/atlasbot/matrix/bot.py @@ -146,14 +146,27 @@ class MatrixBot: latest["note"] = note async def heartbeat() -> None: + last_note = "" + last_sent = 0.0 while not stop.is_set(): await asyncio.sleep(self._settings.thinking_interval_sec) if stop.is_set(): break - note = (latest.get("note") or "thinking").strip() - snippet = note[:32] - msg = f"Still thinking ({snippet})…" - await self._client.send_message(token, room_id, msg) + note = (latest.get("note") or latest.get("stage") or "thinking").strip() + if not note: + note = "thinking" + snippet = note[:64] + now = time.monotonic() + should_send = False + if snippet and snippet != last_note: + should_send = True + elif now - last_sent >= max(60.0, self._settings.thinking_interval_sec * 2): + should_send = True + if should_send: + msg = f"Still thinking — {snippet}…" + await self._client.send_message(token, room_id, msg) + last_note = snippet + last_sent = now task = asyncio.create_task(heartbeat()) started = time.monotonic()