atlasbot: wire context and timeout fallbacks

This commit is contained in:
Brad Stein 2026-03-30 16:55:19 -03:00
parent a1e90f4600
commit f5dcea860e
2 changed files with 111 additions and 30 deletions

View File

@ -2715,6 +2715,11 @@ def _append_history_context(context: str, history_lines: list[str]) -> str:
return combined return combined
def _merge_context_blocks(*blocks: str) -> str:
parts = [block.strip() for block in blocks if isinstance(block, str) and block.strip()]
return "\n\n".join(parts)
class ThoughtState: class ThoughtState:
def __init__(self, total_steps: int = 0): def __init__(self, total_steps: int = 0):
self._lock = threading.Lock() self._lock = threading.Lock()
@ -3072,6 +3077,7 @@ def _ollama_call_safe(
fallback: str, fallback: str,
system_override: str | None = None, system_override: str | None = None,
model: str | None = None, model: str | None = None,
timeout: float | None = None,
) -> str: ) -> str:
try: try:
return _ollama_call( return _ollama_call(
@ -3081,6 +3087,7 @@ def _ollama_call_safe(
use_history=False, use_history=False,
system_override=system_override, system_override=system_override,
model=model, model=model,
timeout=timeout,
) )
except Exception: except Exception:
return fallback return fallback
@ -4226,6 +4233,7 @@ def _open_ended_fast_single(
prompt: str, prompt: str,
*, *,
context: str, context: str,
fallback_context: str | None = None,
history_lines: list[str] | None = None, history_lines: list[str] | None = None,
state: ThoughtState | None = None, state: ThoughtState | None = None,
model: str, model: str,
@ -4233,26 +4241,26 @@ def _open_ended_fast_single(
if state: if state:
state.update("drafting", step=1, note="summarizing") state.update("drafting", step=1, note="summarizing")
working_context = _append_history_context(context, history_lines or []) if history_lines else context working_context = _append_history_context(context, history_lines or []) if history_lines else context
reply = _ollama_call( reply = _ollama_call_safe(
("atlasbot_fast", "atlasbot_fast"), ("atlasbot_fast", "atlasbot_fast"),
prompt, prompt,
context=working_context, context=working_context,
use_history=False, fallback="",
system_override=_open_ended_system(), system_override=_open_ended_system(),
model=model, model=model,
timeout=_mode_ollama_timeout_sec("fast"), timeout=_mode_ollama_timeout_sec("fast"),
) )
if not _has_body_lines(reply): if not _has_body_lines(reply):
reply = _ollama_call( reply = _ollama_call_safe(
("atlasbot_fast", "atlasbot_fast"), ("atlasbot_fast", "atlasbot_fast"),
prompt + " Provide one clear sentence before the score lines.", prompt + " Provide one clear sentence before the score lines.",
context=working_context, context=working_context,
use_history=False, fallback="",
system_override=_open_ended_system(), system_override=_open_ended_system(),
model=model, model=model,
timeout=_mode_ollama_timeout_sec("fast"), timeout=_mode_ollama_timeout_sec("fast"),
) )
fallback = _fallback_fact_answer(prompt, context) fallback = _fallback_fact_answer(prompt, fallback_context or context)
if fallback and (_is_quantitative_prompt(prompt) or not _has_body_lines(reply)): if fallback and (_is_quantitative_prompt(prompt) or not _has_body_lines(reply)):
reply = fallback reply = fallback
if not _has_body_lines(reply): if not _has_body_lines(reply):
@ -4269,6 +4277,7 @@ def _open_ended_fast(
fact_lines: list[str], fact_lines: list[str],
fact_meta: dict[str, dict[str, Any]], fact_meta: dict[str, dict[str, Any]],
history_lines: list[str], history_lines: list[str],
extra_context: str = "",
state: ThoughtState | None = None, state: ThoughtState | None = None,
) -> str: ) -> str:
model = _model_for_mode("fast") model = _model_for_mode("fast")
@ -4289,6 +4298,7 @@ def _open_ended_fast(
selected_pack = _fact_pack_text(selected_lines, selected_meta) selected_pack = _fact_pack_text(selected_lines, selected_meta)
if _needs_full_fact_pack(prompt) or not selected_lines: if _needs_full_fact_pack(prompt) or not selected_lines:
selected_pack = fact_pack selected_pack = fact_pack
model_context = _merge_context_blocks(selected_pack, extra_context)
if not subjective and _needs_full_fact_pack(prompt): if not subjective and _needs_full_fact_pack(prompt):
fallback = _fallback_fact_answer(prompt, fact_pack) fallback = _fallback_fact_answer(prompt, fact_pack)
if fallback: if fallback:
@ -4297,7 +4307,8 @@ def _open_ended_fast(
state.total_steps = _open_ended_total_steps("fast") state.total_steps = _open_ended_total_steps("fast")
return _open_ended_fast_single( return _open_ended_fast_single(
prompt, prompt,
context=selected_pack, context=model_context,
fallback_context=selected_pack,
history_lines=history_lines, history_lines=history_lines,
state=state, state=state,
model=model, model=model,
@ -4312,6 +4323,7 @@ def _open_ended_deep(
fact_meta: dict[str, dict[str, Any]], fact_meta: dict[str, dict[str, Any]],
history_lines: list[str], history_lines: list[str],
mode: str, mode: str,
extra_context: str = "",
state: ThoughtState | None = None, state: ThoughtState | None = None,
) -> str: ) -> str:
normalized = _normalize_mode(mode) normalized = _normalize_mode(mode)
@ -4336,17 +4348,18 @@ def _open_ended_deep(
if _needs_full_fact_pack(prompt) or not selected_lines or normalized == "genius": if _needs_full_fact_pack(prompt) or not selected_lines or normalized == "genius":
selected_pack = fact_pack selected_pack = fact_pack
fallback = _fallback_fact_answer(prompt, selected_pack) fallback = _fallback_fact_answer(prompt, selected_pack)
model_context = _merge_context_blocks(selected_pack, extra_context)
if not subjective and fallback: if not subjective and fallback:
if state: if state:
state.update("done", step=_open_ended_total_steps(normalized)) state.update("done", step=_open_ended_total_steps(normalized))
return _ensure_scores(fallback) return _ensure_scores(fallback)
if state: if state:
state.update("drafting", step=1, note="synthesizing") state.update("drafting", step=1, note="synthesizing")
reply = _ollama_call( reply = _ollama_call_safe(
("atlasbot_deep", "atlasbot_deep"), ("atlasbot_deep", "atlasbot_deep"),
prompt, prompt,
context=_append_history_context(selected_pack, history_lines), context=_append_history_context(model_context, history_lines),
use_history=False, fallback="",
system_override=_open_ended_system(), system_override=_open_ended_system(),
model=model, model=model,
timeout=_mode_ollama_timeout_sec(normalized), timeout=_mode_ollama_timeout_sec(normalized),
@ -4369,6 +4382,7 @@ def open_ended_answer(
history_lines: list[str], history_lines: list[str],
mode: str, mode: str,
allow_tools: bool, allow_tools: bool,
context: str = "",
state: ThoughtState | None = None, state: ThoughtState | None = None,
) -> str: ) -> str:
lines = _fact_pack_lines(prompt, inventory=inventory, snapshot=snapshot, workloads=workloads) lines = _fact_pack_lines(prompt, inventory=inventory, snapshot=snapshot, workloads=workloads)
@ -4393,6 +4407,7 @@ def open_ended_answer(
fact_lines=lines, fact_lines=lines,
fact_meta=fact_meta, fact_meta=fact_meta,
history_lines=history_lines, history_lines=history_lines,
extra_context=context,
state=state, state=state,
) )
return _open_ended_deep( return _open_ended_deep(
@ -4401,6 +4416,7 @@ def open_ended_answer(
fact_lines=lines, fact_lines=lines,
fact_meta=fact_meta, fact_meta=fact_meta,
history_lines=history_lines, history_lines=history_lines,
extra_context=context,
mode=normalized, mode=normalized,
state=state, state=state,
) )
@ -4512,6 +4528,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
history_lines=history_lines, history_lines=history_lines,
mode=mode, mode=mode,
allow_tools=True, allow_tools=True,
context=context,
state=None, state=None,
) )
else: else:
@ -4880,16 +4897,18 @@ def ollama_reply_with_thinking(
done = threading.Event() done = threading.Event()
def worker(): def worker():
result["reply"] = ollama_reply( try:
hist_key, result["reply"] = ollama_reply(
prompt, hist_key,
context=context, prompt,
fallback=fallback, context=context,
use_history=use_history, fallback=fallback,
model=model, use_history=use_history,
timeout=timeout, model=model,
) timeout=timeout,
done.set() )
finally:
done.set()
thread = threading.Thread(target=worker, daemon=True) thread = threading.Thread(target=worker, daemon=True)
thread.start() thread.start()
@ -4921,6 +4940,7 @@ def open_ended_with_thinking(
history_lines: list[str], history_lines: list[str],
mode: str, mode: str,
allow_tools: bool, allow_tools: bool,
context: str = "",
) -> str: ) -> str:
result: dict[str, str] = {"reply": ""} result: dict[str, str] = {"reply": ""}
done = threading.Event() done = threading.Event()
@ -4928,17 +4948,20 @@ def open_ended_with_thinking(
state = ThoughtState(total_steps=total_steps) state = ThoughtState(total_steps=total_steps)
def worker(): def worker():
result["reply"] = open_ended_answer( try:
prompt, result["reply"] = open_ended_answer(
inventory=inventory, prompt,
snapshot=snapshot, inventory=inventory,
workloads=workloads, snapshot=snapshot,
history_lines=history_lines, workloads=workloads,
mode=mode, history_lines=history_lines,
allow_tools=allow_tools, mode=mode,
state=state, allow_tools=allow_tools,
) context=context,
done.set() state=state,
)
finally:
done.set()
thread = threading.Thread(target=worker, daemon=True) thread = threading.Thread(target=worker, daemon=True)
thread.start() thread.start()
@ -5077,6 +5100,7 @@ def sync_loop(token: str, room_id: str, *, account_user: str, default_mode: str)
history_lines=history[hist_key], history_lines=history[hist_key],
mode=_normalize_mode(mode), mode=_normalize_mode(mode),
allow_tools=allow_tools, allow_tools=allow_tools,
context=context,
) )
else: else:
reply = _non_cluster_reply( reply = _non_cluster_reply(

View File

@ -100,8 +100,65 @@ class AtlasbotModeTests(TestCase):
history_lines=[], history_lines=[],
mode="genius", mode="genius",
allow_tools=True, allow_tools=True,
context='Cluster snapshot (JSON): {"injected":true}',
) )
self.assertIn("The worker spread stands out", reply) self.assertIn("The worker spread stands out", reply)
self.assertEqual(captured["model"], "genius-model") self.assertEqual(captured["model"], "genius-model")
self.assertLessEqual(float(captured["timeout"]), 180.0) self.assertLessEqual(float(captured["timeout"]), 180.0)
self.assertIn('Cluster snapshot (JSON): {"injected":true}', str(captured["context"]))
def test_mode_timeouts_stay_within_budgets(self):
fact_lines = [
"hottest_cpu: longhorn-system (6.69)",
"worker_nodes: titan-01, titan-02, titan-03",
]
seen: list[tuple[str, float]] = []
def fake_ollama_call(hist_key, prompt, *, context, use_history=True, system_override=None, model=None, timeout=None):
seen.append((str(model), float(timeout or 0)))
return "Atlas has a clear standout because the worker spread is healthy. Confidence: high"
with (
mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines),
mock.patch.object(self.bot, "_ollama_call", side_effect=fake_ollama_call),
):
for mode in ("fast", "smart", "genius"):
reply = self.bot.open_ended_answer(
"what stands out about titan lab?",
inventory=[],
snapshot=None,
workloads=[],
history_lines=[],
mode=mode,
allow_tools=True,
)
self.assertIn("Confidence:", reply)
self.assertEqual([model for model, _ in seen], ["fast-model", "smart-model", "genius-model"])
self.assertLessEqual(seen[0][1], 15.0)
self.assertLessEqual(seen[1][1], 45.0)
self.assertLessEqual(seen[2][1], 180.0)
def test_llm_timeout_still_returns_a_conclusion(self):
fact_lines = [
"worker_nodes: titan-01, titan-02, titan-03",
"hottest_cpu: longhorn-system (6.69)",
]
with (
mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines),
mock.patch.object(self.bot, "_ollama_call", side_effect=TimeoutError("simulated timeout")),
):
reply = self.bot.open_ended_answer(
"what stands out about the worker nodes?",
inventory=[],
snapshot=None,
workloads=[],
history_lines=[],
mode="genius",
allow_tools=True,
)
self.assertIn("worker nodes", reply.lower())
self.assertIn("Confidence:", reply)