atlasbot: wire context and timeout fallbacks

2026-03-30 16:55:19 -03:00 · 2026-03-30 16:55:19 -03:00 · f5dcea860e
commit f5dcea860e
parent a1e90f4600
2 changed files with 111 additions and 30 deletions
--- a/services/comms/scripts/atlasbot/bot.py
+++ b/services/comms/scripts/atlasbot/bot.py
@ -2715,6 +2715,11 @@ def _append_history_context(context: str, history_lines: list[str]) -> str:
    return combined
 def _merge_context_blocks(*blocks: str) -> str:
    parts = [block.strip() for block in blocks if isinstance(block, str) and block.strip()]
    return "\n\n".join(parts)
 class ThoughtState:
    def __init__(self, total_steps: int = 0):
        self._lock = threading.Lock()
@ -3072,6 +3077,7 @@ def _ollama_call_safe(
    fallback: str,
    system_override: str | None = None,
    model: str | None = None,
    timeout: float | None = None,
 ) -> str:
    try:
        return _ollama_call(
@ -3081,6 +3087,7 @@ def _ollama_call_safe(
            use_history=False,
            system_override=system_override,
            model=model,
            timeout=timeout,
        )
    except Exception:
        return fallback
@ -4226,6 +4233,7 @@ def _open_ended_fast_single(
    prompt: str,
    *,
    context: str,
    fallback_context: str | None = None,
    history_lines: list[str] | None = None,
    state: ThoughtState | None = None,
    model: str,
@ -4233,26 +4241,26 @@ def _open_ended_fast_single(
    if state:
        state.update("drafting", step=1, note="summarizing")
    working_context = _append_history_context(context, history_lines or []) if history_lines else context
-    reply = _ollama_call(
+    reply = _ollama_call_safe(
        ("atlasbot_fast", "atlasbot_fast"),
        prompt,
        context=working_context,
-        use_history=False,
+        fallback="",
        system_override=_open_ended_system(),
        model=model,
        timeout=_mode_ollama_timeout_sec("fast"),
    )
    if not _has_body_lines(reply):
-        reply = _ollama_call(
+        reply = _ollama_call_safe(
            ("atlasbot_fast", "atlasbot_fast"),
            prompt + " Provide one clear sentence before the score lines.",
            context=working_context,
-            use_history=False,
+            fallback="",
            system_override=_open_ended_system(),
            model=model,
            timeout=_mode_ollama_timeout_sec("fast"),
        )
-    fallback = _fallback_fact_answer(prompt, context)
+    fallback = _fallback_fact_answer(prompt, fallback_context or context)
    if fallback and (_is_quantitative_prompt(prompt) or not _has_body_lines(reply)):
        reply = fallback
    if not _has_body_lines(reply):
@ -4269,6 +4277,7 @@ def _open_ended_fast(
    fact_lines: list[str],
    fact_meta: dict[str, dict[str, Any]],
    history_lines: list[str],
    extra_context: str = "",
    state: ThoughtState | None = None,
 ) -> str:
    model = _model_for_mode("fast")
@ -4289,6 +4298,7 @@ def _open_ended_fast(
    selected_pack = _fact_pack_text(selected_lines, selected_meta)
    if _needs_full_fact_pack(prompt) or not selected_lines:
        selected_pack = fact_pack
    model_context = _merge_context_blocks(selected_pack, extra_context)
    if not subjective and _needs_full_fact_pack(prompt):
        fallback = _fallback_fact_answer(prompt, fact_pack)
        if fallback:
@ -4297,7 +4307,8 @@ def _open_ended_fast(
        state.total_steps = _open_ended_total_steps("fast")
    return _open_ended_fast_single(
        prompt,
-        context=selected_pack,
+        context=model_context,
        fallback_context=selected_pack,
        history_lines=history_lines,
        state=state,
        model=model,
@ -4312,6 +4323,7 @@ def _open_ended_deep(
    fact_meta: dict[str, dict[str, Any]],
    history_lines: list[str],
    mode: str,
    extra_context: str = "",
    state: ThoughtState | None = None,
 ) -> str:
    normalized = _normalize_mode(mode)
@ -4336,17 +4348,18 @@ def _open_ended_deep(
    if _needs_full_fact_pack(prompt) or not selected_lines or normalized == "genius":
        selected_pack = fact_pack
    fallback = _fallback_fact_answer(prompt, selected_pack)
    model_context = _merge_context_blocks(selected_pack, extra_context)
    if not subjective and fallback:
        if state:
            state.update("done", step=_open_ended_total_steps(normalized))
        return _ensure_scores(fallback)
    if state:
        state.update("drafting", step=1, note="synthesizing")
-    reply = _ollama_call(
+    reply = _ollama_call_safe(
        ("atlasbot_deep", "atlasbot_deep"),
        prompt,
-        context=_append_history_context(selected_pack, history_lines),
+        context=_append_history_context(model_context, history_lines),
-        use_history=False,
+        fallback="",
        system_override=_open_ended_system(),
        model=model,
        timeout=_mode_ollama_timeout_sec(normalized),
@ -4369,6 +4382,7 @@ def open_ended_answer(
    history_lines: list[str],
    mode: str,
    allow_tools: bool,
    context: str = "",
    state: ThoughtState | None = None,
 ) -> str:
    lines = _fact_pack_lines(prompt, inventory=inventory, snapshot=snapshot, workloads=workloads)
@ -4393,6 +4407,7 @@ def open_ended_answer(
            fact_lines=lines,
            fact_meta=fact_meta,
            history_lines=history_lines,
            extra_context=context,
            state=state,
        )
    return _open_ended_deep(
@ -4401,6 +4416,7 @@ def open_ended_answer(
        fact_lines=lines,
        fact_meta=fact_meta,
        history_lines=history_lines,
        extra_context=context,
        mode=normalized,
        state=state,
    )
@ -4512,6 +4528,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
                history_lines=history_lines,
                mode=mode,
                allow_tools=True,
                context=context,
                state=None,
            )
        else:
@ -4880,16 +4897,18 @@ def ollama_reply_with_thinking(
    done = threading.Event()
    def worker():
-        result["reply"] = ollama_reply(
+        try:
-            hist_key,
+            result["reply"] = ollama_reply(
-            prompt,
+                hist_key,
-            context=context,
+                prompt,
-            fallback=fallback,
+                context=context,
-            use_history=use_history,
+                fallback=fallback,
-            model=model,
+                use_history=use_history,
-            timeout=timeout,
+                model=model,
-        )
+                timeout=timeout,
-        done.set()
+            )
        finally:
            done.set()
    thread = threading.Thread(target=worker, daemon=True)
    thread.start()
@ -4921,6 +4940,7 @@ def open_ended_with_thinking(
    history_lines: list[str],
    mode: str,
    allow_tools: bool,
    context: str = "",
 ) -> str:
    result: dict[str, str] = {"reply": ""}
    done = threading.Event()
@ -4928,17 +4948,20 @@ def open_ended_with_thinking(
    state = ThoughtState(total_steps=total_steps)
    def worker():
-        result["reply"] = open_ended_answer(
+        try:
-            prompt,
+            result["reply"] = open_ended_answer(
-            inventory=inventory,
+                prompt,
-            snapshot=snapshot,
+                inventory=inventory,
-            workloads=workloads,
+                snapshot=snapshot,
-            history_lines=history_lines,
+                workloads=workloads,
-            mode=mode,
+                history_lines=history_lines,
-            allow_tools=allow_tools,
+                mode=mode,
-            state=state,
+                allow_tools=allow_tools,
-        )
+                context=context,
-        done.set()
+                state=state,
            )
        finally:
            done.set()
    thread = threading.Thread(target=worker, daemon=True)
    thread.start()
@ -5077,6 +5100,7 @@ def sync_loop(token: str, room_id: str, *, account_user: str, default_mode: str)
                        history_lines=history[hist_key],
                        mode=_normalize_mode(mode),
                        allow_tools=allow_tools,
                        context=context,
                    )
                else:
                    reply = _non_cluster_reply(
--- a/services/comms/scripts/tests/test_atlasbot_modes.py
+++ b/services/comms/scripts/tests/test_atlasbot_modes.py
@ -100,8 +100,65 @@ class AtlasbotModeTests(TestCase):
                history_lines=[],
                mode="genius",
                allow_tools=True,
                context='Cluster snapshot (JSON): {"injected":true}',
            )
        self.assertIn("The worker spread stands out", reply)
        self.assertEqual(captured["model"], "genius-model")
        self.assertLessEqual(float(captured["timeout"]), 180.0)
        self.assertIn('Cluster snapshot (JSON): {"injected":true}', str(captured["context"]))
    def test_mode_timeouts_stay_within_budgets(self):
        fact_lines = [
            "hottest_cpu: longhorn-system (6.69)",
            "worker_nodes: titan-01, titan-02, titan-03",
        ]
        seen: list[tuple[str, float]] = []
        def fake_ollama_call(hist_key, prompt, *, context, use_history=True, system_override=None, model=None, timeout=None):
            seen.append((str(model), float(timeout or 0)))
            return "Atlas has a clear standout because the worker spread is healthy. Confidence: high"
        with (
            mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines),
            mock.patch.object(self.bot, "_ollama_call", side_effect=fake_ollama_call),
        ):
            for mode in ("fast", "smart", "genius"):
                reply = self.bot.open_ended_answer(
                    "what stands out about titan lab?",
                    inventory=[],
                    snapshot=None,
                    workloads=[],
                    history_lines=[],
                    mode=mode,
                    allow_tools=True,
                )
                self.assertIn("Confidence:", reply)
        self.assertEqual([model for model, _ in seen], ["fast-model", "smart-model", "genius-model"])
        self.assertLessEqual(seen[0][1], 15.0)
        self.assertLessEqual(seen[1][1], 45.0)
        self.assertLessEqual(seen[2][1], 180.0)
    def test_llm_timeout_still_returns_a_conclusion(self):
        fact_lines = [
            "worker_nodes: titan-01, titan-02, titan-03",
            "hottest_cpu: longhorn-system (6.69)",
        ]
        with (
            mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines),
            mock.patch.object(self.bot, "_ollama_call", side_effect=TimeoutError("simulated timeout")),
        ):
            reply = self.bot.open_ended_answer(
                "what stands out about the worker nodes?",
                inventory=[],
                snapshot=None,
                workloads=[],
                history_lines=[],
                mode="genius",
                allow_tools=True,
            )
        self.assertIn("worker nodes", reply.lower())
        self.assertIn("Confidence:", reply)