atlasbot: wire context and timeout fallbacks

2026-03-30 16:55:19 -03:00 · 2026-03-30 16:55:19 -03:00 · 5977ef15f0
commit 5977ef15f0
parent 9f04bfde27
2 changed files with 111 additions and 30 deletions
--- a/services/comms/scripts/atlasbot/bot.py
+++ b/services/comms/scripts/atlasbot/bot.py
@ -2716,6 +2716,11 @@ def _append_history_context(context: str, history_lines: list[str]) -> str:
    return combined


+def _merge_context_blocks(*blocks: str) -> str:
+    parts = [block.strip() for block in blocks if isinstance(block, str) and block.strip()]
+    return "\n\n".join(parts)
+
+
 class ThoughtState:
    def __init__(self, total_steps: int = 0):
        self._lock = threading.Lock()
@ -3073,6 +3078,7 @@ def _ollama_call_safe(
    fallback: str,
    system_override: str | None = None,
    model: str | None = None,
+    timeout: float | None = None,
 ) -> str:
    try:
        return _ollama_call(
@ -3082,6 +3088,7 @@ def _ollama_call_safe(
            use_history=False,
            system_override=system_override,
            model=model,
+            timeout=timeout,
        )
    except Exception:
        return fallback
@ -4227,6 +4234,7 @@ def _open_ended_fast_single(
    prompt: str,
    *,
    context: str,
+    fallback_context: str | None = None,
    history_lines: list[str] | None = None,
    state: ThoughtState | None = None,
    model: str,
@ -4234,26 +4242,26 @@ def _open_ended_fast_single(
    if state:
        state.update("drafting", step=1, note="summarizing")
    working_context = _append_history_context(context, history_lines or []) if history_lines else context
-    reply = _ollama_call(
+    reply = _ollama_call_safe(
        ("atlasbot_fast", "atlasbot_fast"),
        prompt,
        context=working_context,
-        use_history=False,
+        fallback="",
        system_override=_open_ended_system(),
        model=model,
        timeout=_mode_ollama_timeout_sec("fast"),
    )
    if not _has_body_lines(reply):
-        reply = _ollama_call(
+        reply = _ollama_call_safe(
            ("atlasbot_fast", "atlasbot_fast"),
            prompt + " Provide one clear sentence before the score lines.",
            context=working_context,
-            use_history=False,
+            fallback="",
            system_override=_open_ended_system(),
            model=model,
            timeout=_mode_ollama_timeout_sec("fast"),
        )
-    fallback = _fallback_fact_answer(prompt, context)
+    fallback = _fallback_fact_answer(prompt, fallback_context or context)
    if fallback and (_is_quantitative_prompt(prompt) or not _has_body_lines(reply)):
        reply = fallback
    if not _has_body_lines(reply):
@ -4270,6 +4278,7 @@ def _open_ended_fast(
    fact_lines: list[str],
    fact_meta: dict[str, dict[str, Any]],
    history_lines: list[str],
+    extra_context: str = "",
    state: ThoughtState | None = None,
 ) -> str:
    model = _model_for_mode("fast")
@ -4290,6 +4299,7 @@ def _open_ended_fast(
    selected_pack = _fact_pack_text(selected_lines, selected_meta)
    if _needs_full_fact_pack(prompt) or not selected_lines:
        selected_pack = fact_pack
+    model_context = _merge_context_blocks(selected_pack, extra_context)
    if not subjective and _needs_full_fact_pack(prompt):
        fallback = _fallback_fact_answer(prompt, fact_pack)
        if fallback:
@ -4298,7 +4308,8 @@ def _open_ended_fast(
        state.total_steps = _open_ended_total_steps("fast")
    return _open_ended_fast_single(
        prompt,
-        context=selected_pack,
+        context=model_context,
+        fallback_context=selected_pack,
        history_lines=history_lines,
        state=state,
        model=model,
@ -4313,6 +4324,7 @@ def _open_ended_deep(
    fact_meta: dict[str, dict[str, Any]],
    history_lines: list[str],
    mode: str,
+    extra_context: str = "",
    state: ThoughtState | None = None,
 ) -> str:
    normalized = _normalize_mode(mode)
@ -4337,17 +4349,18 @@ def _open_ended_deep(
    if _needs_full_fact_pack(prompt) or not selected_lines or normalized == "genius":
        selected_pack = fact_pack
    fallback = _fallback_fact_answer(prompt, selected_pack)
+    model_context = _merge_context_blocks(selected_pack, extra_context)
    if not subjective and fallback:
        if state:
            state.update("done", step=_open_ended_total_steps(normalized))
        return _ensure_scores(fallback)
    if state:
        state.update("drafting", step=1, note="synthesizing")
-    reply = _ollama_call(
+    reply = _ollama_call_safe(
        ("atlasbot_deep", "atlasbot_deep"),
        prompt,
-        context=_append_history_context(selected_pack, history_lines),
-        use_history=False,
+        context=_append_history_context(model_context, history_lines),
+        fallback="",
        system_override=_open_ended_system(),
        model=model,
        timeout=_mode_ollama_timeout_sec(normalized),
@ -4370,6 +4383,7 @@ def open_ended_answer(
    history_lines: list[str],
    mode: str,
    allow_tools: bool,
+    context: str = "",
    state: ThoughtState | None = None,
 ) -> str:
    lines = _fact_pack_lines(prompt, inventory=inventory, snapshot=snapshot, workloads=workloads)
@ -4394,6 +4408,7 @@ def open_ended_answer(
            fact_lines=lines,
            fact_meta=fact_meta,
            history_lines=history_lines,
+            extra_context=context,
            state=state,
        )
    return _open_ended_deep(
@ -4402,6 +4417,7 @@ def open_ended_answer(
        fact_lines=lines,
        fact_meta=fact_meta,
        history_lines=history_lines,
+        extra_context=context,
        mode=normalized,
        state=state,
    )
@ -4513,6 +4529,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
                history_lines=history_lines,
                mode=mode,
                allow_tools=True,
+                context=context,
                state=None,
            )
        else:
@ -4881,16 +4898,18 @@ def ollama_reply_with_thinking(
    done = threading.Event()

    def worker():
-        result["reply"] = ollama_reply(
-            hist_key,
-            prompt,
-            context=context,
-            fallback=fallback,
-            use_history=use_history,
-            model=model,
-            timeout=timeout,
-        )
-        done.set()
+        try:
+            result["reply"] = ollama_reply(
+                hist_key,
+                prompt,
+                context=context,
+                fallback=fallback,
+                use_history=use_history,
+                model=model,
+                timeout=timeout,
+            )
+        finally:
+            done.set()

    thread = threading.Thread(target=worker, daemon=True)
    thread.start()
@ -4922,6 +4941,7 @@ def open_ended_with_thinking(
    history_lines: list[str],
    mode: str,
    allow_tools: bool,
+    context: str = "",
 ) -> str:
    result: dict[str, str] = {"reply": ""}
    done = threading.Event()
@ -4929,17 +4949,20 @@ def open_ended_with_thinking(
    state = ThoughtState(total_steps=total_steps)

    def worker():
-        result["reply"] = open_ended_answer(
-            prompt,
-            inventory=inventory,
-            snapshot=snapshot,
-            workloads=workloads,
-            history_lines=history_lines,
-            mode=mode,
-            allow_tools=allow_tools,
-            state=state,
-        )
-        done.set()
+        try:
+            result["reply"] = open_ended_answer(
+                prompt,
+                inventory=inventory,
+                snapshot=snapshot,
+                workloads=workloads,
+                history_lines=history_lines,
+                mode=mode,
+                allow_tools=allow_tools,
+                context=context,
+                state=state,
+            )
+        finally:
+            done.set()

    thread = threading.Thread(target=worker, daemon=True)
    thread.start()
@ -5078,6 +5101,7 @@ def sync_loop(token: str, room_id: str, *, account_user: str, default_mode: str)
                        history_lines=history[hist_key],
                        mode=_normalize_mode(mode),
                        allow_tools=allow_tools,
+                        context=context,
                    )
                else:
                    reply = _non_cluster_reply(
--- a/services/comms/scripts/tests/test_atlasbot_modes.py
+++ b/services/comms/scripts/tests/test_atlasbot_modes.py
@ -100,8 +100,65 @@ class AtlasbotModeTests(TestCase):
                history_lines=[],
                mode="genius",
                allow_tools=True,
+                context='Cluster snapshot (JSON): {"injected":true}',
            )

        self.assertIn("The worker spread stands out", reply)
        self.assertEqual(captured["model"], "genius-model")
        self.assertLessEqual(float(captured["timeout"]), 180.0)
+        self.assertIn('Cluster snapshot (JSON): {"injected":true}', str(captured["context"]))
+
+    def test_mode_timeouts_stay_within_budgets(self):
+        fact_lines = [
+            "hottest_cpu: longhorn-system (6.69)",
+            "worker_nodes: titan-01, titan-02, titan-03",
+        ]
+        seen: list[tuple[str, float]] = []
+
+        def fake_ollama_call(hist_key, prompt, *, context, use_history=True, system_override=None, model=None, timeout=None):
+            seen.append((str(model), float(timeout or 0)))
+            return "Atlas has a clear standout because the worker spread is healthy. Confidence: high"
+
+        with (
+            mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines),
+            mock.patch.object(self.bot, "_ollama_call", side_effect=fake_ollama_call),
+        ):
+            for mode in ("fast", "smart", "genius"):
+                reply = self.bot.open_ended_answer(
+                    "what stands out about titan lab?",
+                    inventory=[],
+                    snapshot=None,
+                    workloads=[],
+                    history_lines=[],
+                    mode=mode,
+                    allow_tools=True,
+                )
+                self.assertIn("Confidence:", reply)
+
+        self.assertEqual([model for model, _ in seen], ["fast-model", "smart-model", "genius-model"])
+        self.assertLessEqual(seen[0][1], 15.0)
+        self.assertLessEqual(seen[1][1], 45.0)
+        self.assertLessEqual(seen[2][1], 180.0)
+
+    def test_llm_timeout_still_returns_a_conclusion(self):
+        fact_lines = [
+            "worker_nodes: titan-01, titan-02, titan-03",
+            "hottest_cpu: longhorn-system (6.69)",
+        ]
+
+        with (
+            mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines),
+            mock.patch.object(self.bot, "_ollama_call", side_effect=TimeoutError("simulated timeout")),
+        ):
+            reply = self.bot.open_ended_answer(
+                "what stands out about the worker nodes?",
+                inventory=[],
+                snapshot=None,
+                workloads=[],
+                history_lines=[],
+                mode="genius",
+                allow_tools=True,
+            )
+
+        self.assertIn("worker nodes", reply.lower())
+        self.assertIn("Confidence:", reply)