atlasbot: wire context and timeout fallbacks

This commit is contained in:
Brad Stein 2026-03-30 16:55:19 -03:00
parent 9f04bfde27
commit 5977ef15f0
2 changed files with 111 additions and 30 deletions

View File

@ -2716,6 +2716,11 @@ def _append_history_context(context: str, history_lines: list[str]) -> str:
return combined
def _merge_context_blocks(*blocks: str) -> str:
parts = [block.strip() for block in blocks if isinstance(block, str) and block.strip()]
return "\n\n".join(parts)
class ThoughtState:
def __init__(self, total_steps: int = 0):
self._lock = threading.Lock()
@ -3073,6 +3078,7 @@ def _ollama_call_safe(
fallback: str,
system_override: str | None = None,
model: str | None = None,
timeout: float | None = None,
) -> str:
try:
return _ollama_call(
@ -3082,6 +3088,7 @@ def _ollama_call_safe(
use_history=False,
system_override=system_override,
model=model,
timeout=timeout,
)
except Exception:
return fallback
@ -4227,6 +4234,7 @@ def _open_ended_fast_single(
prompt: str,
*,
context: str,
fallback_context: str | None = None,
history_lines: list[str] | None = None,
state: ThoughtState | None = None,
model: str,
@ -4234,26 +4242,26 @@ def _open_ended_fast_single(
if state:
state.update("drafting", step=1, note="summarizing")
working_context = _append_history_context(context, history_lines or []) if history_lines else context
reply = _ollama_call(
reply = _ollama_call_safe(
("atlasbot_fast", "atlasbot_fast"),
prompt,
context=working_context,
use_history=False,
fallback="",
system_override=_open_ended_system(),
model=model,
timeout=_mode_ollama_timeout_sec("fast"),
)
if not _has_body_lines(reply):
reply = _ollama_call(
reply = _ollama_call_safe(
("atlasbot_fast", "atlasbot_fast"),
prompt + " Provide one clear sentence before the score lines.",
context=working_context,
use_history=False,
fallback="",
system_override=_open_ended_system(),
model=model,
timeout=_mode_ollama_timeout_sec("fast"),
)
fallback = _fallback_fact_answer(prompt, context)
fallback = _fallback_fact_answer(prompt, fallback_context or context)
if fallback and (_is_quantitative_prompt(prompt) or not _has_body_lines(reply)):
reply = fallback
if not _has_body_lines(reply):
@ -4270,6 +4278,7 @@ def _open_ended_fast(
fact_lines: list[str],
fact_meta: dict[str, dict[str, Any]],
history_lines: list[str],
extra_context: str = "",
state: ThoughtState | None = None,
) -> str:
model = _model_for_mode("fast")
@ -4290,6 +4299,7 @@ def _open_ended_fast(
selected_pack = _fact_pack_text(selected_lines, selected_meta)
if _needs_full_fact_pack(prompt) or not selected_lines:
selected_pack = fact_pack
model_context = _merge_context_blocks(selected_pack, extra_context)
if not subjective and _needs_full_fact_pack(prompt):
fallback = _fallback_fact_answer(prompt, fact_pack)
if fallback:
@ -4298,7 +4308,8 @@ def _open_ended_fast(
state.total_steps = _open_ended_total_steps("fast")
return _open_ended_fast_single(
prompt,
context=selected_pack,
context=model_context,
fallback_context=selected_pack,
history_lines=history_lines,
state=state,
model=model,
@ -4313,6 +4324,7 @@ def _open_ended_deep(
fact_meta: dict[str, dict[str, Any]],
history_lines: list[str],
mode: str,
extra_context: str = "",
state: ThoughtState | None = None,
) -> str:
normalized = _normalize_mode(mode)
@ -4337,17 +4349,18 @@ def _open_ended_deep(
if _needs_full_fact_pack(prompt) or not selected_lines or normalized == "genius":
selected_pack = fact_pack
fallback = _fallback_fact_answer(prompt, selected_pack)
model_context = _merge_context_blocks(selected_pack, extra_context)
if not subjective and fallback:
if state:
state.update("done", step=_open_ended_total_steps(normalized))
return _ensure_scores(fallback)
if state:
state.update("drafting", step=1, note="synthesizing")
reply = _ollama_call(
reply = _ollama_call_safe(
("atlasbot_deep", "atlasbot_deep"),
prompt,
context=_append_history_context(selected_pack, history_lines),
use_history=False,
context=_append_history_context(model_context, history_lines),
fallback="",
system_override=_open_ended_system(),
model=model,
timeout=_mode_ollama_timeout_sec(normalized),
@ -4370,6 +4383,7 @@ def open_ended_answer(
history_lines: list[str],
mode: str,
allow_tools: bool,
context: str = "",
state: ThoughtState | None = None,
) -> str:
lines = _fact_pack_lines(prompt, inventory=inventory, snapshot=snapshot, workloads=workloads)
@ -4394,6 +4408,7 @@ def open_ended_answer(
fact_lines=lines,
fact_meta=fact_meta,
history_lines=history_lines,
extra_context=context,
state=state,
)
return _open_ended_deep(
@ -4402,6 +4417,7 @@ def open_ended_answer(
fact_lines=lines,
fact_meta=fact_meta,
history_lines=history_lines,
extra_context=context,
mode=normalized,
state=state,
)
@ -4513,6 +4529,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
history_lines=history_lines,
mode=mode,
allow_tools=True,
context=context,
state=None,
)
else:
@ -4881,16 +4898,18 @@ def ollama_reply_with_thinking(
done = threading.Event()
def worker():
result["reply"] = ollama_reply(
hist_key,
prompt,
context=context,
fallback=fallback,
use_history=use_history,
model=model,
timeout=timeout,
)
done.set()
try:
result["reply"] = ollama_reply(
hist_key,
prompt,
context=context,
fallback=fallback,
use_history=use_history,
model=model,
timeout=timeout,
)
finally:
done.set()
thread = threading.Thread(target=worker, daemon=True)
thread.start()
@ -4922,6 +4941,7 @@ def open_ended_with_thinking(
history_lines: list[str],
mode: str,
allow_tools: bool,
context: str = "",
) -> str:
result: dict[str, str] = {"reply": ""}
done = threading.Event()
@ -4929,17 +4949,20 @@ def open_ended_with_thinking(
state = ThoughtState(total_steps=total_steps)
def worker():
result["reply"] = open_ended_answer(
prompt,
inventory=inventory,
snapshot=snapshot,
workloads=workloads,
history_lines=history_lines,
mode=mode,
allow_tools=allow_tools,
state=state,
)
done.set()
try:
result["reply"] = open_ended_answer(
prompt,
inventory=inventory,
snapshot=snapshot,
workloads=workloads,
history_lines=history_lines,
mode=mode,
allow_tools=allow_tools,
context=context,
state=state,
)
finally:
done.set()
thread = threading.Thread(target=worker, daemon=True)
thread.start()
@ -5078,6 +5101,7 @@ def sync_loop(token: str, room_id: str, *, account_user: str, default_mode: str)
history_lines=history[hist_key],
mode=_normalize_mode(mode),
allow_tools=allow_tools,
context=context,
)
else:
reply = _non_cluster_reply(

View File

@ -100,8 +100,65 @@ class AtlasbotModeTests(TestCase):
history_lines=[],
mode="genius",
allow_tools=True,
context='Cluster snapshot (JSON): {"injected":true}',
)
self.assertIn("The worker spread stands out", reply)
self.assertEqual(captured["model"], "genius-model")
self.assertLessEqual(float(captured["timeout"]), 180.0)
self.assertIn('Cluster snapshot (JSON): {"injected":true}', str(captured["context"]))
def test_mode_timeouts_stay_within_budgets(self):
fact_lines = [
"hottest_cpu: longhorn-system (6.69)",
"worker_nodes: titan-01, titan-02, titan-03",
]
seen: list[tuple[str, float]] = []
def fake_ollama_call(hist_key, prompt, *, context, use_history=True, system_override=None, model=None, timeout=None):
seen.append((str(model), float(timeout or 0)))
return "Atlas has a clear standout because the worker spread is healthy. Confidence: high"
with (
mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines),
mock.patch.object(self.bot, "_ollama_call", side_effect=fake_ollama_call),
):
for mode in ("fast", "smart", "genius"):
reply = self.bot.open_ended_answer(
"what stands out about titan lab?",
inventory=[],
snapshot=None,
workloads=[],
history_lines=[],
mode=mode,
allow_tools=True,
)
self.assertIn("Confidence:", reply)
self.assertEqual([model for model, _ in seen], ["fast-model", "smart-model", "genius-model"])
self.assertLessEqual(seen[0][1], 15.0)
self.assertLessEqual(seen[1][1], 45.0)
self.assertLessEqual(seen[2][1], 180.0)
def test_llm_timeout_still_returns_a_conclusion(self):
fact_lines = [
"worker_nodes: titan-01, titan-02, titan-03",
"hottest_cpu: longhorn-system (6.69)",
]
with (
mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines),
mock.patch.object(self.bot, "_ollama_call", side_effect=TimeoutError("simulated timeout")),
):
reply = self.bot.open_ended_answer(
"what stands out about the worker nodes?",
inventory=[],
snapshot=None,
workloads=[],
history_lines=[],
mode="genius",
allow_tools=True,
)
self.assertIn("worker nodes", reply.lower())
self.assertIn("Confidence:", reply)