atlasbot: wire context and timeout fallbacks
This commit is contained in:
parent
9f04bfde27
commit
5977ef15f0
@ -2716,6 +2716,11 @@ def _append_history_context(context: str, history_lines: list[str]) -> str:
|
||||
return combined
|
||||
|
||||
|
||||
def _merge_context_blocks(*blocks: str) -> str:
|
||||
parts = [block.strip() for block in blocks if isinstance(block, str) and block.strip()]
|
||||
return "\n\n".join(parts)
|
||||
|
||||
|
||||
class ThoughtState:
|
||||
def __init__(self, total_steps: int = 0):
|
||||
self._lock = threading.Lock()
|
||||
@ -3073,6 +3078,7 @@ def _ollama_call_safe(
|
||||
fallback: str,
|
||||
system_override: str | None = None,
|
||||
model: str | None = None,
|
||||
timeout: float | None = None,
|
||||
) -> str:
|
||||
try:
|
||||
return _ollama_call(
|
||||
@ -3082,6 +3088,7 @@ def _ollama_call_safe(
|
||||
use_history=False,
|
||||
system_override=system_override,
|
||||
model=model,
|
||||
timeout=timeout,
|
||||
)
|
||||
except Exception:
|
||||
return fallback
|
||||
@ -4227,6 +4234,7 @@ def _open_ended_fast_single(
|
||||
prompt: str,
|
||||
*,
|
||||
context: str,
|
||||
fallback_context: str | None = None,
|
||||
history_lines: list[str] | None = None,
|
||||
state: ThoughtState | None = None,
|
||||
model: str,
|
||||
@ -4234,26 +4242,26 @@ def _open_ended_fast_single(
|
||||
if state:
|
||||
state.update("drafting", step=1, note="summarizing")
|
||||
working_context = _append_history_context(context, history_lines or []) if history_lines else context
|
||||
reply = _ollama_call(
|
||||
reply = _ollama_call_safe(
|
||||
("atlasbot_fast", "atlasbot_fast"),
|
||||
prompt,
|
||||
context=working_context,
|
||||
use_history=False,
|
||||
fallback="",
|
||||
system_override=_open_ended_system(),
|
||||
model=model,
|
||||
timeout=_mode_ollama_timeout_sec("fast"),
|
||||
)
|
||||
if not _has_body_lines(reply):
|
||||
reply = _ollama_call(
|
||||
reply = _ollama_call_safe(
|
||||
("atlasbot_fast", "atlasbot_fast"),
|
||||
prompt + " Provide one clear sentence before the score lines.",
|
||||
context=working_context,
|
||||
use_history=False,
|
||||
fallback="",
|
||||
system_override=_open_ended_system(),
|
||||
model=model,
|
||||
timeout=_mode_ollama_timeout_sec("fast"),
|
||||
)
|
||||
fallback = _fallback_fact_answer(prompt, context)
|
||||
fallback = _fallback_fact_answer(prompt, fallback_context or context)
|
||||
if fallback and (_is_quantitative_prompt(prompt) or not _has_body_lines(reply)):
|
||||
reply = fallback
|
||||
if not _has_body_lines(reply):
|
||||
@ -4270,6 +4278,7 @@ def _open_ended_fast(
|
||||
fact_lines: list[str],
|
||||
fact_meta: dict[str, dict[str, Any]],
|
||||
history_lines: list[str],
|
||||
extra_context: str = "",
|
||||
state: ThoughtState | None = None,
|
||||
) -> str:
|
||||
model = _model_for_mode("fast")
|
||||
@ -4290,6 +4299,7 @@ def _open_ended_fast(
|
||||
selected_pack = _fact_pack_text(selected_lines, selected_meta)
|
||||
if _needs_full_fact_pack(prompt) or not selected_lines:
|
||||
selected_pack = fact_pack
|
||||
model_context = _merge_context_blocks(selected_pack, extra_context)
|
||||
if not subjective and _needs_full_fact_pack(prompt):
|
||||
fallback = _fallback_fact_answer(prompt, fact_pack)
|
||||
if fallback:
|
||||
@ -4298,7 +4308,8 @@ def _open_ended_fast(
|
||||
state.total_steps = _open_ended_total_steps("fast")
|
||||
return _open_ended_fast_single(
|
||||
prompt,
|
||||
context=selected_pack,
|
||||
context=model_context,
|
||||
fallback_context=selected_pack,
|
||||
history_lines=history_lines,
|
||||
state=state,
|
||||
model=model,
|
||||
@ -4313,6 +4324,7 @@ def _open_ended_deep(
|
||||
fact_meta: dict[str, dict[str, Any]],
|
||||
history_lines: list[str],
|
||||
mode: str,
|
||||
extra_context: str = "",
|
||||
state: ThoughtState | None = None,
|
||||
) -> str:
|
||||
normalized = _normalize_mode(mode)
|
||||
@ -4337,17 +4349,18 @@ def _open_ended_deep(
|
||||
if _needs_full_fact_pack(prompt) or not selected_lines or normalized == "genius":
|
||||
selected_pack = fact_pack
|
||||
fallback = _fallback_fact_answer(prompt, selected_pack)
|
||||
model_context = _merge_context_blocks(selected_pack, extra_context)
|
||||
if not subjective and fallback:
|
||||
if state:
|
||||
state.update("done", step=_open_ended_total_steps(normalized))
|
||||
return _ensure_scores(fallback)
|
||||
if state:
|
||||
state.update("drafting", step=1, note="synthesizing")
|
||||
reply = _ollama_call(
|
||||
reply = _ollama_call_safe(
|
||||
("atlasbot_deep", "atlasbot_deep"),
|
||||
prompt,
|
||||
context=_append_history_context(selected_pack, history_lines),
|
||||
use_history=False,
|
||||
context=_append_history_context(model_context, history_lines),
|
||||
fallback="",
|
||||
system_override=_open_ended_system(),
|
||||
model=model,
|
||||
timeout=_mode_ollama_timeout_sec(normalized),
|
||||
@ -4370,6 +4383,7 @@ def open_ended_answer(
|
||||
history_lines: list[str],
|
||||
mode: str,
|
||||
allow_tools: bool,
|
||||
context: str = "",
|
||||
state: ThoughtState | None = None,
|
||||
) -> str:
|
||||
lines = _fact_pack_lines(prompt, inventory=inventory, snapshot=snapshot, workloads=workloads)
|
||||
@ -4394,6 +4408,7 @@ def open_ended_answer(
|
||||
fact_lines=lines,
|
||||
fact_meta=fact_meta,
|
||||
history_lines=history_lines,
|
||||
extra_context=context,
|
||||
state=state,
|
||||
)
|
||||
return _open_ended_deep(
|
||||
@ -4402,6 +4417,7 @@ def open_ended_answer(
|
||||
fact_lines=lines,
|
||||
fact_meta=fact_meta,
|
||||
history_lines=history_lines,
|
||||
extra_context=context,
|
||||
mode=normalized,
|
||||
state=state,
|
||||
)
|
||||
@ -4513,6 +4529,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
|
||||
history_lines=history_lines,
|
||||
mode=mode,
|
||||
allow_tools=True,
|
||||
context=context,
|
||||
state=None,
|
||||
)
|
||||
else:
|
||||
@ -4881,16 +4898,18 @@ def ollama_reply_with_thinking(
|
||||
done = threading.Event()
|
||||
|
||||
def worker():
|
||||
result["reply"] = ollama_reply(
|
||||
hist_key,
|
||||
prompt,
|
||||
context=context,
|
||||
fallback=fallback,
|
||||
use_history=use_history,
|
||||
model=model,
|
||||
timeout=timeout,
|
||||
)
|
||||
done.set()
|
||||
try:
|
||||
result["reply"] = ollama_reply(
|
||||
hist_key,
|
||||
prompt,
|
||||
context=context,
|
||||
fallback=fallback,
|
||||
use_history=use_history,
|
||||
model=model,
|
||||
timeout=timeout,
|
||||
)
|
||||
finally:
|
||||
done.set()
|
||||
|
||||
thread = threading.Thread(target=worker, daemon=True)
|
||||
thread.start()
|
||||
@ -4922,6 +4941,7 @@ def open_ended_with_thinking(
|
||||
history_lines: list[str],
|
||||
mode: str,
|
||||
allow_tools: bool,
|
||||
context: str = "",
|
||||
) -> str:
|
||||
result: dict[str, str] = {"reply": ""}
|
||||
done = threading.Event()
|
||||
@ -4929,17 +4949,20 @@ def open_ended_with_thinking(
|
||||
state = ThoughtState(total_steps=total_steps)
|
||||
|
||||
def worker():
|
||||
result["reply"] = open_ended_answer(
|
||||
prompt,
|
||||
inventory=inventory,
|
||||
snapshot=snapshot,
|
||||
workloads=workloads,
|
||||
history_lines=history_lines,
|
||||
mode=mode,
|
||||
allow_tools=allow_tools,
|
||||
state=state,
|
||||
)
|
||||
done.set()
|
||||
try:
|
||||
result["reply"] = open_ended_answer(
|
||||
prompt,
|
||||
inventory=inventory,
|
||||
snapshot=snapshot,
|
||||
workloads=workloads,
|
||||
history_lines=history_lines,
|
||||
mode=mode,
|
||||
allow_tools=allow_tools,
|
||||
context=context,
|
||||
state=state,
|
||||
)
|
||||
finally:
|
||||
done.set()
|
||||
|
||||
thread = threading.Thread(target=worker, daemon=True)
|
||||
thread.start()
|
||||
@ -5078,6 +5101,7 @@ def sync_loop(token: str, room_id: str, *, account_user: str, default_mode: str)
|
||||
history_lines=history[hist_key],
|
||||
mode=_normalize_mode(mode),
|
||||
allow_tools=allow_tools,
|
||||
context=context,
|
||||
)
|
||||
else:
|
||||
reply = _non_cluster_reply(
|
||||
|
||||
@ -100,8 +100,65 @@ class AtlasbotModeTests(TestCase):
|
||||
history_lines=[],
|
||||
mode="genius",
|
||||
allow_tools=True,
|
||||
context='Cluster snapshot (JSON): {"injected":true}',
|
||||
)
|
||||
|
||||
self.assertIn("The worker spread stands out", reply)
|
||||
self.assertEqual(captured["model"], "genius-model")
|
||||
self.assertLessEqual(float(captured["timeout"]), 180.0)
|
||||
self.assertIn('Cluster snapshot (JSON): {"injected":true}', str(captured["context"]))
|
||||
|
||||
def test_mode_timeouts_stay_within_budgets(self):
|
||||
fact_lines = [
|
||||
"hottest_cpu: longhorn-system (6.69)",
|
||||
"worker_nodes: titan-01, titan-02, titan-03",
|
||||
]
|
||||
seen: list[tuple[str, float]] = []
|
||||
|
||||
def fake_ollama_call(hist_key, prompt, *, context, use_history=True, system_override=None, model=None, timeout=None):
|
||||
seen.append((str(model), float(timeout or 0)))
|
||||
return "Atlas has a clear standout because the worker spread is healthy. Confidence: high"
|
||||
|
||||
with (
|
||||
mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines),
|
||||
mock.patch.object(self.bot, "_ollama_call", side_effect=fake_ollama_call),
|
||||
):
|
||||
for mode in ("fast", "smart", "genius"):
|
||||
reply = self.bot.open_ended_answer(
|
||||
"what stands out about titan lab?",
|
||||
inventory=[],
|
||||
snapshot=None,
|
||||
workloads=[],
|
||||
history_lines=[],
|
||||
mode=mode,
|
||||
allow_tools=True,
|
||||
)
|
||||
self.assertIn("Confidence:", reply)
|
||||
|
||||
self.assertEqual([model for model, _ in seen], ["fast-model", "smart-model", "genius-model"])
|
||||
self.assertLessEqual(seen[0][1], 15.0)
|
||||
self.assertLessEqual(seen[1][1], 45.0)
|
||||
self.assertLessEqual(seen[2][1], 180.0)
|
||||
|
||||
def test_llm_timeout_still_returns_a_conclusion(self):
|
||||
fact_lines = [
|
||||
"worker_nodes: titan-01, titan-02, titan-03",
|
||||
"hottest_cpu: longhorn-system (6.69)",
|
||||
]
|
||||
|
||||
with (
|
||||
mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines),
|
||||
mock.patch.object(self.bot, "_ollama_call", side_effect=TimeoutError("simulated timeout")),
|
||||
):
|
||||
reply = self.bot.open_ended_answer(
|
||||
"what stands out about the worker nodes?",
|
||||
inventory=[],
|
||||
snapshot=None,
|
||||
workloads=[],
|
||||
history_lines=[],
|
||||
mode="genius",
|
||||
allow_tools=True,
|
||||
)
|
||||
|
||||
self.assertIn("worker nodes", reply.lower())
|
||||
self.assertIn("Confidence:", reply)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user