From e93aa6e33b7544dd71eec1e7ad92ca1ce0f36161 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 30 Mar 2026 16:55:19 -0300 Subject: [PATCH] atlasbot: wire context and timeout fallbacks --- services/comms/scripts/atlasbot/bot.py | 84 ++++++++++++------- .../scripts/tests/test_atlasbot_modes.py | 57 +++++++++++++ 2 files changed, 111 insertions(+), 30 deletions(-) diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py index 84d771fe..b287463f 100644 --- a/services/comms/scripts/atlasbot/bot.py +++ b/services/comms/scripts/atlasbot/bot.py @@ -2715,6 +2715,11 @@ def _append_history_context(context: str, history_lines: list[str]) -> str: return combined +def _merge_context_blocks(*blocks: str) -> str: + parts = [block.strip() for block in blocks if isinstance(block, str) and block.strip()] + return "\n\n".join(parts) + + class ThoughtState: def __init__(self, total_steps: int = 0): self._lock = threading.Lock() @@ -3072,6 +3077,7 @@ def _ollama_call_safe( fallback: str, system_override: str | None = None, model: str | None = None, + timeout: float | None = None, ) -> str: try: return _ollama_call( @@ -3081,6 +3087,7 @@ def _ollama_call_safe( use_history=False, system_override=system_override, model=model, + timeout=timeout, ) except Exception: return fallback @@ -4226,6 +4233,7 @@ def _open_ended_fast_single( prompt: str, *, context: str, + fallback_context: str | None = None, history_lines: list[str] | None = None, state: ThoughtState | None = None, model: str, @@ -4233,26 +4241,26 @@ def _open_ended_fast_single( if state: state.update("drafting", step=1, note="summarizing") working_context = _append_history_context(context, history_lines or []) if history_lines else context - reply = _ollama_call( + reply = _ollama_call_safe( ("atlasbot_fast", "atlasbot_fast"), prompt, context=working_context, - use_history=False, + fallback="", system_override=_open_ended_system(), model=model, timeout=_mode_ollama_timeout_sec("fast"), ) if not _has_body_lines(reply): - reply = _ollama_call( + reply = _ollama_call_safe( ("atlasbot_fast", "atlasbot_fast"), prompt + " Provide one clear sentence before the score lines.", context=working_context, - use_history=False, + fallback="", system_override=_open_ended_system(), model=model, timeout=_mode_ollama_timeout_sec("fast"), ) - fallback = _fallback_fact_answer(prompt, context) + fallback = _fallback_fact_answer(prompt, fallback_context or context) if fallback and (_is_quantitative_prompt(prompt) or not _has_body_lines(reply)): reply = fallback if not _has_body_lines(reply): @@ -4269,6 +4277,7 @@ def _open_ended_fast( fact_lines: list[str], fact_meta: dict[str, dict[str, Any]], history_lines: list[str], + extra_context: str = "", state: ThoughtState | None = None, ) -> str: model = _model_for_mode("fast") @@ -4289,6 +4298,7 @@ def _open_ended_fast( selected_pack = _fact_pack_text(selected_lines, selected_meta) if _needs_full_fact_pack(prompt) or not selected_lines: selected_pack = fact_pack + model_context = _merge_context_blocks(selected_pack, extra_context) if not subjective and _needs_full_fact_pack(prompt): fallback = _fallback_fact_answer(prompt, fact_pack) if fallback: @@ -4297,7 +4307,8 @@ def _open_ended_fast( state.total_steps = _open_ended_total_steps("fast") return _open_ended_fast_single( prompt, - context=selected_pack, + context=model_context, + fallback_context=selected_pack, history_lines=history_lines, state=state, model=model, @@ -4312,6 +4323,7 @@ def _open_ended_deep( fact_meta: dict[str, dict[str, Any]], history_lines: list[str], mode: str, + extra_context: str = "", state: ThoughtState | None = None, ) -> str: normalized = _normalize_mode(mode) @@ -4336,17 +4348,18 @@ def _open_ended_deep( if _needs_full_fact_pack(prompt) or not selected_lines or normalized == "genius": selected_pack = fact_pack fallback = _fallback_fact_answer(prompt, selected_pack) + model_context = _merge_context_blocks(selected_pack, extra_context) if not subjective and fallback: if state: state.update("done", step=_open_ended_total_steps(normalized)) return _ensure_scores(fallback) if state: state.update("drafting", step=1, note="synthesizing") - reply = _ollama_call( + reply = _ollama_call_safe( ("atlasbot_deep", "atlasbot_deep"), prompt, - context=_append_history_context(selected_pack, history_lines), - use_history=False, + context=_append_history_context(model_context, history_lines), + fallback="", system_override=_open_ended_system(), model=model, timeout=_mode_ollama_timeout_sec(normalized), @@ -4369,6 +4382,7 @@ def open_ended_answer( history_lines: list[str], mode: str, allow_tools: bool, + context: str = "", state: ThoughtState | None = None, ) -> str: lines = _fact_pack_lines(prompt, inventory=inventory, snapshot=snapshot, workloads=workloads) @@ -4393,6 +4407,7 @@ def open_ended_answer( fact_lines=lines, fact_meta=fact_meta, history_lines=history_lines, + extra_context=context, state=state, ) return _open_ended_deep( @@ -4401,6 +4416,7 @@ def open_ended_answer( fact_lines=lines, fact_meta=fact_meta, history_lines=history_lines, + extra_context=context, mode=normalized, state=state, ) @@ -4512,6 +4528,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler): history_lines=history_lines, mode=mode, allow_tools=True, + context=context, state=None, ) else: @@ -4880,16 +4897,18 @@ def ollama_reply_with_thinking( done = threading.Event() def worker(): - result["reply"] = ollama_reply( - hist_key, - prompt, - context=context, - fallback=fallback, - use_history=use_history, - model=model, - timeout=timeout, - ) - done.set() + try: + result["reply"] = ollama_reply( + hist_key, + prompt, + context=context, + fallback=fallback, + use_history=use_history, + model=model, + timeout=timeout, + ) + finally: + done.set() thread = threading.Thread(target=worker, daemon=True) thread.start() @@ -4921,6 +4940,7 @@ def open_ended_with_thinking( history_lines: list[str], mode: str, allow_tools: bool, + context: str = "", ) -> str: result: dict[str, str] = {"reply": ""} done = threading.Event() @@ -4928,17 +4948,20 @@ def open_ended_with_thinking( state = ThoughtState(total_steps=total_steps) def worker(): - result["reply"] = open_ended_answer( - prompt, - inventory=inventory, - snapshot=snapshot, - workloads=workloads, - history_lines=history_lines, - mode=mode, - allow_tools=allow_tools, - state=state, - ) - done.set() + try: + result["reply"] = open_ended_answer( + prompt, + inventory=inventory, + snapshot=snapshot, + workloads=workloads, + history_lines=history_lines, + mode=mode, + allow_tools=allow_tools, + context=context, + state=state, + ) + finally: + done.set() thread = threading.Thread(target=worker, daemon=True) thread.start() @@ -5077,6 +5100,7 @@ def sync_loop(token: str, room_id: str, *, account_user: str, default_mode: str) history_lines=history[hist_key], mode=_normalize_mode(mode), allow_tools=allow_tools, + context=context, ) else: reply = _non_cluster_reply( diff --git a/services/comms/scripts/tests/test_atlasbot_modes.py b/services/comms/scripts/tests/test_atlasbot_modes.py index 9a1a8037..895b8da0 100644 --- a/services/comms/scripts/tests/test_atlasbot_modes.py +++ b/services/comms/scripts/tests/test_atlasbot_modes.py @@ -100,8 +100,65 @@ class AtlasbotModeTests(TestCase): history_lines=[], mode="genius", allow_tools=True, + context='Cluster snapshot (JSON): {"injected":true}', ) self.assertIn("The worker spread stands out", reply) self.assertEqual(captured["model"], "genius-model") self.assertLessEqual(float(captured["timeout"]), 180.0) + self.assertIn('Cluster snapshot (JSON): {"injected":true}', str(captured["context"])) + + def test_mode_timeouts_stay_within_budgets(self): + fact_lines = [ + "hottest_cpu: longhorn-system (6.69)", + "worker_nodes: titan-01, titan-02, titan-03", + ] + seen: list[tuple[str, float]] = [] + + def fake_ollama_call(hist_key, prompt, *, context, use_history=True, system_override=None, model=None, timeout=None): + seen.append((str(model), float(timeout or 0))) + return "Atlas has a clear standout because the worker spread is healthy. Confidence: high" + + with ( + mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines), + mock.patch.object(self.bot, "_ollama_call", side_effect=fake_ollama_call), + ): + for mode in ("fast", "smart", "genius"): + reply = self.bot.open_ended_answer( + "what stands out about titan lab?", + inventory=[], + snapshot=None, + workloads=[], + history_lines=[], + mode=mode, + allow_tools=True, + ) + self.assertIn("Confidence:", reply) + + self.assertEqual([model for model, _ in seen], ["fast-model", "smart-model", "genius-model"]) + self.assertLessEqual(seen[0][1], 15.0) + self.assertLessEqual(seen[1][1], 45.0) + self.assertLessEqual(seen[2][1], 180.0) + + def test_llm_timeout_still_returns_a_conclusion(self): + fact_lines = [ + "worker_nodes: titan-01, titan-02, titan-03", + "hottest_cpu: longhorn-system (6.69)", + ] + + with ( + mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines), + mock.patch.object(self.bot, "_ollama_call", side_effect=TimeoutError("simulated timeout")), + ): + reply = self.bot.open_ended_answer( + "what stands out about the worker nodes?", + inventory=[], + snapshot=None, + workloads=[], + history_lines=[], + mode="genius", + allow_tools=True, + ) + + self.assertIn("worker nodes", reply.lower()) + self.assertIn("Confidence:", reply)