From e93aa6e33b7544dd71eec1e7ad92ca1ce0f36161 Mon Sep 17 00:00:00 2001
From: Brad Stein <Brad.Stein@gmail.com>
Date: Mon, 30 Mar 2026 16:55:19 -0300
Subject: [PATCH] atlasbot: wire context and timeout fallbacks

---
 services/comms/scripts/atlasbot/bot.py        | 84 ++++++++++++-------
 .../scripts/tests/test_atlasbot_modes.py      | 57 +++++++++++++
 2 files changed, 111 insertions(+), 30 deletions(-)

diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py
index 84d771fe..b287463f 100644
--- a/services/comms/scripts/atlasbot/bot.py
+++ b/services/comms/scripts/atlasbot/bot.py
@@ -2715,6 +2715,11 @@ def _append_history_context(context: str, history_lines: list[str]) -> str:
     return combined
 
 
+def _merge_context_blocks(*blocks: str) -> str:
+    parts = [block.strip() for block in blocks if isinstance(block, str) and block.strip()]
+    return "\n\n".join(parts)
+
+
 class ThoughtState:
     def __init__(self, total_steps: int = 0):
         self._lock = threading.Lock()
@@ -3072,6 +3077,7 @@ def _ollama_call_safe(
     fallback: str,
     system_override: str | None = None,
     model: str | None = None,
+    timeout: float | None = None,
 ) -> str:
     try:
         return _ollama_call(
@@ -3081,6 +3087,7 @@ def _ollama_call_safe(
             use_history=False,
             system_override=system_override,
             model=model,
+            timeout=timeout,
         )
     except Exception:
         return fallback
@@ -4226,6 +4233,7 @@ def _open_ended_fast_single(
     prompt: str,
     *,
     context: str,
+    fallback_context: str | None = None,
     history_lines: list[str] | None = None,
     state: ThoughtState | None = None,
     model: str,
@@ -4233,26 +4241,26 @@ def _open_ended_fast_single(
     if state:
         state.update("drafting", step=1, note="summarizing")
     working_context = _append_history_context(context, history_lines or []) if history_lines else context
-    reply = _ollama_call(
+    reply = _ollama_call_safe(
         ("atlasbot_fast", "atlasbot_fast"),
         prompt,
         context=working_context,
-        use_history=False,
+        fallback="",
         system_override=_open_ended_system(),
         model=model,
         timeout=_mode_ollama_timeout_sec("fast"),
     )
     if not _has_body_lines(reply):
-        reply = _ollama_call(
+        reply = _ollama_call_safe(
             ("atlasbot_fast", "atlasbot_fast"),
             prompt + " Provide one clear sentence before the score lines.",
             context=working_context,
-            use_history=False,
+            fallback="",
             system_override=_open_ended_system(),
             model=model,
             timeout=_mode_ollama_timeout_sec("fast"),
         )
-    fallback = _fallback_fact_answer(prompt, context)
+    fallback = _fallback_fact_answer(prompt, fallback_context or context)
     if fallback and (_is_quantitative_prompt(prompt) or not _has_body_lines(reply)):
         reply = fallback
     if not _has_body_lines(reply):
@@ -4269,6 +4277,7 @@ def _open_ended_fast(
     fact_lines: list[str],
     fact_meta: dict[str, dict[str, Any]],
     history_lines: list[str],
+    extra_context: str = "",
     state: ThoughtState | None = None,
 ) -> str:
     model = _model_for_mode("fast")
@@ -4289,6 +4298,7 @@ def _open_ended_fast(
     selected_pack = _fact_pack_text(selected_lines, selected_meta)
     if _needs_full_fact_pack(prompt) or not selected_lines:
         selected_pack = fact_pack
+    model_context = _merge_context_blocks(selected_pack, extra_context)
     if not subjective and _needs_full_fact_pack(prompt):
         fallback = _fallback_fact_answer(prompt, fact_pack)
         if fallback:
@@ -4297,7 +4307,8 @@ def _open_ended_fast(
         state.total_steps = _open_ended_total_steps("fast")
     return _open_ended_fast_single(
         prompt,
-        context=selected_pack,
+        context=model_context,
+        fallback_context=selected_pack,
         history_lines=history_lines,
         state=state,
         model=model,
@@ -4312,6 +4323,7 @@ def _open_ended_deep(
     fact_meta: dict[str, dict[str, Any]],
     history_lines: list[str],
     mode: str,
+    extra_context: str = "",
     state: ThoughtState | None = None,
 ) -> str:
     normalized = _normalize_mode(mode)
@@ -4336,17 +4348,18 @@ def _open_ended_deep(
     if _needs_full_fact_pack(prompt) or not selected_lines or normalized == "genius":
         selected_pack = fact_pack
     fallback = _fallback_fact_answer(prompt, selected_pack)
+    model_context = _merge_context_blocks(selected_pack, extra_context)
     if not subjective and fallback:
         if state:
             state.update("done", step=_open_ended_total_steps(normalized))
         return _ensure_scores(fallback)
     if state:
         state.update("drafting", step=1, note="synthesizing")
-    reply = _ollama_call(
+    reply = _ollama_call_safe(
         ("atlasbot_deep", "atlasbot_deep"),
         prompt,
-        context=_append_history_context(selected_pack, history_lines),
-        use_history=False,
+        context=_append_history_context(model_context, history_lines),
+        fallback="",
         system_override=_open_ended_system(),
         model=model,
         timeout=_mode_ollama_timeout_sec(normalized),
@@ -4369,6 +4382,7 @@ def open_ended_answer(
     history_lines: list[str],
     mode: str,
     allow_tools: bool,
+    context: str = "",
     state: ThoughtState | None = None,
 ) -> str:
     lines = _fact_pack_lines(prompt, inventory=inventory, snapshot=snapshot, workloads=workloads)
@@ -4393,6 +4407,7 @@ def open_ended_answer(
             fact_lines=lines,
             fact_meta=fact_meta,
             history_lines=history_lines,
+            extra_context=context,
             state=state,
         )
     return _open_ended_deep(
@@ -4401,6 +4416,7 @@ def open_ended_answer(
         fact_lines=lines,
         fact_meta=fact_meta,
         history_lines=history_lines,
+        extra_context=context,
         mode=normalized,
         state=state,
     )
@@ -4512,6 +4528,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
                 history_lines=history_lines,
                 mode=mode,
                 allow_tools=True,
+                context=context,
                 state=None,
             )
         else:
@@ -4880,16 +4897,18 @@ def ollama_reply_with_thinking(
     done = threading.Event()
 
     def worker():
-        result["reply"] = ollama_reply(
-            hist_key,
-            prompt,
-            context=context,
-            fallback=fallback,
-            use_history=use_history,
-            model=model,
-            timeout=timeout,
-        )
-        done.set()
+        try:
+            result["reply"] = ollama_reply(
+                hist_key,
+                prompt,
+                context=context,
+                fallback=fallback,
+                use_history=use_history,
+                model=model,
+                timeout=timeout,
+            )
+        finally:
+            done.set()
 
     thread = threading.Thread(target=worker, daemon=True)
     thread.start()
@@ -4921,6 +4940,7 @@ def open_ended_with_thinking(
     history_lines: list[str],
     mode: str,
     allow_tools: bool,
+    context: str = "",
 ) -> str:
     result: dict[str, str] = {"reply": ""}
     done = threading.Event()
@@ -4928,17 +4948,20 @@ def open_ended_with_thinking(
     state = ThoughtState(total_steps=total_steps)
 
     def worker():
-        result["reply"] = open_ended_answer(
-            prompt,
-            inventory=inventory,
-            snapshot=snapshot,
-            workloads=workloads,
-            history_lines=history_lines,
-            mode=mode,
-            allow_tools=allow_tools,
-            state=state,
-        )
-        done.set()
+        try:
+            result["reply"] = open_ended_answer(
+                prompt,
+                inventory=inventory,
+                snapshot=snapshot,
+                workloads=workloads,
+                history_lines=history_lines,
+                mode=mode,
+                allow_tools=allow_tools,
+                context=context,
+                state=state,
+            )
+        finally:
+            done.set()
 
     thread = threading.Thread(target=worker, daemon=True)
     thread.start()
@@ -5077,6 +5100,7 @@ def sync_loop(token: str, room_id: str, *, account_user: str, default_mode: str)
                         history_lines=history[hist_key],
                         mode=_normalize_mode(mode),
                         allow_tools=allow_tools,
+                        context=context,
                     )
                 else:
                     reply = _non_cluster_reply(
diff --git a/services/comms/scripts/tests/test_atlasbot_modes.py b/services/comms/scripts/tests/test_atlasbot_modes.py
index 9a1a8037..895b8da0 100644
--- a/services/comms/scripts/tests/test_atlasbot_modes.py
+++ b/services/comms/scripts/tests/test_atlasbot_modes.py
@@ -100,8 +100,65 @@ class AtlasbotModeTests(TestCase):
                 history_lines=[],
                 mode="genius",
                 allow_tools=True,
+                context='Cluster snapshot (JSON): {"injected":true}',
             )
 
         self.assertIn("The worker spread stands out", reply)
         self.assertEqual(captured["model"], "genius-model")
         self.assertLessEqual(float(captured["timeout"]), 180.0)
+        self.assertIn('Cluster snapshot (JSON): {"injected":true}', str(captured["context"]))
+
+    def test_mode_timeouts_stay_within_budgets(self):
+        fact_lines = [
+            "hottest_cpu: longhorn-system (6.69)",
+            "worker_nodes: titan-01, titan-02, titan-03",
+        ]
+        seen: list[tuple[str, float]] = []
+
+        def fake_ollama_call(hist_key, prompt, *, context, use_history=True, system_override=None, model=None, timeout=None):
+            seen.append((str(model), float(timeout or 0)))
+            return "Atlas has a clear standout because the worker spread is healthy. Confidence: high"
+
+        with (
+            mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines),
+            mock.patch.object(self.bot, "_ollama_call", side_effect=fake_ollama_call),
+        ):
+            for mode in ("fast", "smart", "genius"):
+                reply = self.bot.open_ended_answer(
+                    "what stands out about titan lab?",
+                    inventory=[],
+                    snapshot=None,
+                    workloads=[],
+                    history_lines=[],
+                    mode=mode,
+                    allow_tools=True,
+                )
+                self.assertIn("Confidence:", reply)
+
+        self.assertEqual([model for model, _ in seen], ["fast-model", "smart-model", "genius-model"])
+        self.assertLessEqual(seen[0][1], 15.0)
+        self.assertLessEqual(seen[1][1], 45.0)
+        self.assertLessEqual(seen[2][1], 180.0)
+
+    def test_llm_timeout_still_returns_a_conclusion(self):
+        fact_lines = [
+            "worker_nodes: titan-01, titan-02, titan-03",
+            "hottest_cpu: longhorn-system (6.69)",
+        ]
+
+        with (
+            mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines),
+            mock.patch.object(self.bot, "_ollama_call", side_effect=TimeoutError("simulated timeout")),
+        ):
+            reply = self.bot.open_ended_answer(
+                "what stands out about the worker nodes?",
+                inventory=[],
+                snapshot=None,
+                workloads=[],
+                history_lines=[],
+                mode="genius",
+                allow_tools=True,
+            )
+
+        self.assertIn("worker nodes", reply.lower())
+        self.assertIn("Confidence:", reply)