atlasbot: stabilize open-ended responses
This commit is contained in:
parent
92bc8b642d
commit
6def6c167c
@ -764,7 +764,7 @@ class AnswerEngine:
|
|||||||
if not metric_facts or not _has_keyword_overlap(metric_facts, keyword_tokens):
|
if not metric_facts or not _has_keyword_overlap(metric_facts, keyword_tokens):
|
||||||
best_line = _best_keyword_line(summary_lines, keyword_tokens)
|
best_line = _best_keyword_line(summary_lines, keyword_tokens)
|
||||||
if best_line:
|
if best_line:
|
||||||
reply = f"From the latest snapshot: {best_line}."
|
reply = f"Latest metrics: {best_line}."
|
||||||
if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and metric_facts:
|
if (classify.get("question_type") in {"metric", "diagnostic"} or force_metric) and metric_facts:
|
||||||
best_line = None
|
best_line = None
|
||||||
lowered_keywords = [kw.lower() for kw in keyword_tokens if kw]
|
lowered_keywords = [kw.lower() for kw in keyword_tokens if kw]
|
||||||
@ -777,7 +777,20 @@ class AnswerEngine:
|
|||||||
reply_numbers = set(re.findall(r"\d+(?:\.\d+)?", reply))
|
reply_numbers = set(re.findall(r"\d+(?:\.\d+)?", reply))
|
||||||
fact_numbers = set(re.findall(r"\d+(?:\.\d+)?", " ".join(metric_facts)))
|
fact_numbers = set(re.findall(r"\d+(?:\.\d+)?", " ".join(metric_facts)))
|
||||||
if not reply_numbers or (fact_numbers and not (reply_numbers & fact_numbers)):
|
if not reply_numbers or (fact_numbers and not (reply_numbers & fact_numbers)):
|
||||||
reply = f"From the latest snapshot: {best_line}."
|
reply = f"Latest metrics: {best_line}."
|
||||||
|
|
||||||
|
if _should_use_insight_guard(classify):
|
||||||
|
if observer:
|
||||||
|
observer("insight_guard", "checking for concrete signals")
|
||||||
|
reply = await _apply_insight_guard(
|
||||||
|
normalized,
|
||||||
|
reply,
|
||||||
|
classify,
|
||||||
|
context,
|
||||||
|
plan,
|
||||||
|
call_llm,
|
||||||
|
metric_facts or key_facts,
|
||||||
|
)
|
||||||
|
|
||||||
if plan.use_critic:
|
if plan.use_critic:
|
||||||
if observer:
|
if observer:
|
||||||
@ -823,6 +836,9 @@ class AnswerEngine:
|
|||||||
extra={"extra": {"mode": mode, "seconds": elapsed, "llm_calls": call_count, "limit": call_cap, "limit_hit": limit_hit}},
|
extra={"extra": {"mode": mode, "seconds": elapsed, "llm_calls": call_count, "limit": call_cap, "limit_hit": limit_hit}},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if limit_hit and "run limitless" not in reply.lower():
|
||||||
|
reply = reply.rstrip() + "\n\nNote: I hit my reasoning limit. Ask again with 'Run limitless' for a deeper pass."
|
||||||
|
|
||||||
if conversation_id and claims:
|
if conversation_id and claims:
|
||||||
self._store_state(conversation_id, claims, summary, snapshot_used, pin_snapshot)
|
self._store_state(conversation_id, claims, summary, snapshot_used, pin_snapshot)
|
||||||
|
|
||||||
@ -2425,6 +2441,46 @@ def _needs_evidence_fix(reply: str, classify: dict[str, Any]) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _should_use_insight_guard(classify: dict[str, Any]) -> bool:
|
||||||
|
style = (classify.get("answer_style") or "").strip().lower()
|
||||||
|
qtype = (classify.get("question_type") or "").strip().lower()
|
||||||
|
return style == "insightful" or qtype in {"open_ended", "planning"}
|
||||||
|
|
||||||
|
|
||||||
|
async def _apply_insight_guard(
|
||||||
|
question: str,
|
||||||
|
reply: str,
|
||||||
|
classify: dict[str, Any],
|
||||||
|
context: str,
|
||||||
|
plan: ModePlan,
|
||||||
|
call_llm: Callable[..., Awaitable[str]],
|
||||||
|
facts: list[str],
|
||||||
|
) -> str:
|
||||||
|
if not reply or not _should_use_insight_guard(classify):
|
||||||
|
return reply
|
||||||
|
guard_prompt = prompts.INSIGHT_GUARD_PROMPT.format(question=question, answer=reply)
|
||||||
|
guard_raw = await call_llm(
|
||||||
|
prompts.INSIGHT_GUARD_SYSTEM,
|
||||||
|
guard_prompt,
|
||||||
|
context=context,
|
||||||
|
model=plan.fast_model,
|
||||||
|
tag="insight_guard",
|
||||||
|
)
|
||||||
|
guard = _parse_json_block(guard_raw, fallback={})
|
||||||
|
if guard.get("ok") is True:
|
||||||
|
return reply
|
||||||
|
fix_prompt = prompts.INSIGHT_FIX_PROMPT.format(question=question, answer=reply)
|
||||||
|
if facts:
|
||||||
|
fix_prompt = fix_prompt + "\nFacts:\n" + "\n".join(facts[:6])
|
||||||
|
return await call_llm(
|
||||||
|
prompts.INSIGHT_FIX_SYSTEM,
|
||||||
|
fix_prompt,
|
||||||
|
context=context,
|
||||||
|
model=plan.model,
|
||||||
|
tag="insight_fix",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _reply_matches_metric_facts(reply: str, metric_facts: list[str], tokens: list[str] | set[str] | None = None) -> bool:
|
def _reply_matches_metric_facts(reply: str, metric_facts: list[str], tokens: list[str] | set[str] | None = None) -> bool:
|
||||||
if not reply or not metric_facts:
|
if not reply or not metric_facts:
|
||||||
return True
|
return True
|
||||||
|
|||||||
@ -134,7 +134,8 @@ SYNTHESIZE_SYSTEM = (
|
|||||||
SYNTHESIZE_PROMPT = (
|
SYNTHESIZE_PROMPT = (
|
||||||
"Write a final response to the user. "
|
"Write a final response to the user. "
|
||||||
"Use sub-answers as evidence, avoid raw metric dumps unless asked. "
|
"Use sub-answers as evidence, avoid raw metric dumps unless asked. "
|
||||||
"If Style is insightful or the question is open-ended, choose 1-2 salient points and explain why they stand out. "
|
"If Style is insightful or the question is open-ended, choose at least 2 salient points (or 1 if only one exists) "
|
||||||
|
"and explain why they stand out. "
|
||||||
"If Style is direct, answer concisely with the specific value requested."
|
"If Style is direct, answer concisely with the specific value requested."
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -173,6 +174,35 @@ EVIDENCE_GUARD_PROMPT = (
|
|||||||
"Return the corrected answer only."
|
"Return the corrected answer only."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
INSIGHT_GUARD_SYSTEM = (
|
||||||
|
CLUSTER_SYSTEM
|
||||||
|
+ " Check if an open-ended answer includes at least two concrete signals. "
|
||||||
|
+ "Return JSON only."
|
||||||
|
)
|
||||||
|
|
||||||
|
INSIGHT_GUARD_PROMPT = (
|
||||||
|
"Question: {question}\n"
|
||||||
|
"Answer: {answer}\n\n"
|
||||||
|
"Return JSON with fields: ok (bool), reason (string). "
|
||||||
|
"ok=true only if the answer includes at least two concrete signals with what+where+evidence "
|
||||||
|
"(metric name + value or explicit condition)."
|
||||||
|
)
|
||||||
|
|
||||||
|
INSIGHT_FIX_SYSTEM = (
|
||||||
|
CLUSTER_SYSTEM
|
||||||
|
+ " Rewrite the answer to be insightful and grounded for open-ended questions. "
|
||||||
|
+ "Include at least two concrete signals with what+where+evidence. "
|
||||||
|
+ "If nothing stands out, say so explicitly, then mention two most notable normal signals with evidence. "
|
||||||
|
+ "Return the answer only."
|
||||||
|
)
|
||||||
|
|
||||||
|
INSIGHT_FIX_PROMPT = (
|
||||||
|
"Question: {question}\n"
|
||||||
|
"Draft: {answer}\n"
|
||||||
|
"If Facts are provided, use them. "
|
||||||
|
"Avoid saying 'based on the snapshot' or 'based on the context'."
|
||||||
|
)
|
||||||
|
|
||||||
RUNBOOK_ENFORCE_SYSTEM = (
|
RUNBOOK_ENFORCE_SYSTEM = (
|
||||||
CLUSTER_SYSTEM
|
CLUSTER_SYSTEM
|
||||||
+ " Ensure the answer includes the required runbook path. "
|
+ " Ensure the answer includes the required runbook path. "
|
||||||
|
|||||||
@ -146,14 +146,27 @@ class MatrixBot:
|
|||||||
latest["note"] = note
|
latest["note"] = note
|
||||||
|
|
||||||
async def heartbeat() -> None:
|
async def heartbeat() -> None:
|
||||||
|
last_note = ""
|
||||||
|
last_sent = 0.0
|
||||||
while not stop.is_set():
|
while not stop.is_set():
|
||||||
await asyncio.sleep(self._settings.thinking_interval_sec)
|
await asyncio.sleep(self._settings.thinking_interval_sec)
|
||||||
if stop.is_set():
|
if stop.is_set():
|
||||||
break
|
break
|
||||||
note = (latest.get("note") or "thinking").strip()
|
note = (latest.get("note") or latest.get("stage") or "thinking").strip()
|
||||||
snippet = note[:32]
|
if not note:
|
||||||
msg = f"Still thinking ({snippet})…"
|
note = "thinking"
|
||||||
|
snippet = note[:64]
|
||||||
|
now = time.monotonic()
|
||||||
|
should_send = False
|
||||||
|
if snippet and snippet != last_note:
|
||||||
|
should_send = True
|
||||||
|
elif now - last_sent >= max(60.0, self._settings.thinking_interval_sec * 2):
|
||||||
|
should_send = True
|
||||||
|
if should_send:
|
||||||
|
msg = f"Still thinking — {snippet}…"
|
||||||
await self._client.send_message(token, room_id, msg)
|
await self._client.send_message(token, room_id, msg)
|
||||||
|
last_note = snippet
|
||||||
|
last_sent = now
|
||||||
|
|
||||||
task = asyncio.create_task(heartbeat())
|
task = asyncio.create_task(heartbeat())
|
||||||
started = time.monotonic()
|
started = time.monotonic()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user