atlasbot: wire context and timeout fallbacks
This commit is contained in:
parent
a1e90f4600
commit
f5dcea860e
@ -2715,6 +2715,11 @@ def _append_history_context(context: str, history_lines: list[str]) -> str:
|
|||||||
return combined
|
return combined
|
||||||
|
|
||||||
|
|
||||||
|
def _merge_context_blocks(*blocks: str) -> str:
|
||||||
|
parts = [block.strip() for block in blocks if isinstance(block, str) and block.strip()]
|
||||||
|
return "\n\n".join(parts)
|
||||||
|
|
||||||
|
|
||||||
class ThoughtState:
|
class ThoughtState:
|
||||||
def __init__(self, total_steps: int = 0):
|
def __init__(self, total_steps: int = 0):
|
||||||
self._lock = threading.Lock()
|
self._lock = threading.Lock()
|
||||||
@ -3072,6 +3077,7 @@ def _ollama_call_safe(
|
|||||||
fallback: str,
|
fallback: str,
|
||||||
system_override: str | None = None,
|
system_override: str | None = None,
|
||||||
model: str | None = None,
|
model: str | None = None,
|
||||||
|
timeout: float | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
try:
|
try:
|
||||||
return _ollama_call(
|
return _ollama_call(
|
||||||
@ -3081,6 +3087,7 @@ def _ollama_call_safe(
|
|||||||
use_history=False,
|
use_history=False,
|
||||||
system_override=system_override,
|
system_override=system_override,
|
||||||
model=model,
|
model=model,
|
||||||
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
return fallback
|
return fallback
|
||||||
@ -4226,6 +4233,7 @@ def _open_ended_fast_single(
|
|||||||
prompt: str,
|
prompt: str,
|
||||||
*,
|
*,
|
||||||
context: str,
|
context: str,
|
||||||
|
fallback_context: str | None = None,
|
||||||
history_lines: list[str] | None = None,
|
history_lines: list[str] | None = None,
|
||||||
state: ThoughtState | None = None,
|
state: ThoughtState | None = None,
|
||||||
model: str,
|
model: str,
|
||||||
@ -4233,26 +4241,26 @@ def _open_ended_fast_single(
|
|||||||
if state:
|
if state:
|
||||||
state.update("drafting", step=1, note="summarizing")
|
state.update("drafting", step=1, note="summarizing")
|
||||||
working_context = _append_history_context(context, history_lines or []) if history_lines else context
|
working_context = _append_history_context(context, history_lines or []) if history_lines else context
|
||||||
reply = _ollama_call(
|
reply = _ollama_call_safe(
|
||||||
("atlasbot_fast", "atlasbot_fast"),
|
("atlasbot_fast", "atlasbot_fast"),
|
||||||
prompt,
|
prompt,
|
||||||
context=working_context,
|
context=working_context,
|
||||||
use_history=False,
|
fallback="",
|
||||||
system_override=_open_ended_system(),
|
system_override=_open_ended_system(),
|
||||||
model=model,
|
model=model,
|
||||||
timeout=_mode_ollama_timeout_sec("fast"),
|
timeout=_mode_ollama_timeout_sec("fast"),
|
||||||
)
|
)
|
||||||
if not _has_body_lines(reply):
|
if not _has_body_lines(reply):
|
||||||
reply = _ollama_call(
|
reply = _ollama_call_safe(
|
||||||
("atlasbot_fast", "atlasbot_fast"),
|
("atlasbot_fast", "atlasbot_fast"),
|
||||||
prompt + " Provide one clear sentence before the score lines.",
|
prompt + " Provide one clear sentence before the score lines.",
|
||||||
context=working_context,
|
context=working_context,
|
||||||
use_history=False,
|
fallback="",
|
||||||
system_override=_open_ended_system(),
|
system_override=_open_ended_system(),
|
||||||
model=model,
|
model=model,
|
||||||
timeout=_mode_ollama_timeout_sec("fast"),
|
timeout=_mode_ollama_timeout_sec("fast"),
|
||||||
)
|
)
|
||||||
fallback = _fallback_fact_answer(prompt, context)
|
fallback = _fallback_fact_answer(prompt, fallback_context or context)
|
||||||
if fallback and (_is_quantitative_prompt(prompt) or not _has_body_lines(reply)):
|
if fallback and (_is_quantitative_prompt(prompt) or not _has_body_lines(reply)):
|
||||||
reply = fallback
|
reply = fallback
|
||||||
if not _has_body_lines(reply):
|
if not _has_body_lines(reply):
|
||||||
@ -4269,6 +4277,7 @@ def _open_ended_fast(
|
|||||||
fact_lines: list[str],
|
fact_lines: list[str],
|
||||||
fact_meta: dict[str, dict[str, Any]],
|
fact_meta: dict[str, dict[str, Any]],
|
||||||
history_lines: list[str],
|
history_lines: list[str],
|
||||||
|
extra_context: str = "",
|
||||||
state: ThoughtState | None = None,
|
state: ThoughtState | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
model = _model_for_mode("fast")
|
model = _model_for_mode("fast")
|
||||||
@ -4289,6 +4298,7 @@ def _open_ended_fast(
|
|||||||
selected_pack = _fact_pack_text(selected_lines, selected_meta)
|
selected_pack = _fact_pack_text(selected_lines, selected_meta)
|
||||||
if _needs_full_fact_pack(prompt) or not selected_lines:
|
if _needs_full_fact_pack(prompt) or not selected_lines:
|
||||||
selected_pack = fact_pack
|
selected_pack = fact_pack
|
||||||
|
model_context = _merge_context_blocks(selected_pack, extra_context)
|
||||||
if not subjective and _needs_full_fact_pack(prompt):
|
if not subjective and _needs_full_fact_pack(prompt):
|
||||||
fallback = _fallback_fact_answer(prompt, fact_pack)
|
fallback = _fallback_fact_answer(prompt, fact_pack)
|
||||||
if fallback:
|
if fallback:
|
||||||
@ -4297,7 +4307,8 @@ def _open_ended_fast(
|
|||||||
state.total_steps = _open_ended_total_steps("fast")
|
state.total_steps = _open_ended_total_steps("fast")
|
||||||
return _open_ended_fast_single(
|
return _open_ended_fast_single(
|
||||||
prompt,
|
prompt,
|
||||||
context=selected_pack,
|
context=model_context,
|
||||||
|
fallback_context=selected_pack,
|
||||||
history_lines=history_lines,
|
history_lines=history_lines,
|
||||||
state=state,
|
state=state,
|
||||||
model=model,
|
model=model,
|
||||||
@ -4312,6 +4323,7 @@ def _open_ended_deep(
|
|||||||
fact_meta: dict[str, dict[str, Any]],
|
fact_meta: dict[str, dict[str, Any]],
|
||||||
history_lines: list[str],
|
history_lines: list[str],
|
||||||
mode: str,
|
mode: str,
|
||||||
|
extra_context: str = "",
|
||||||
state: ThoughtState | None = None,
|
state: ThoughtState | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
normalized = _normalize_mode(mode)
|
normalized = _normalize_mode(mode)
|
||||||
@ -4336,17 +4348,18 @@ def _open_ended_deep(
|
|||||||
if _needs_full_fact_pack(prompt) or not selected_lines or normalized == "genius":
|
if _needs_full_fact_pack(prompt) or not selected_lines or normalized == "genius":
|
||||||
selected_pack = fact_pack
|
selected_pack = fact_pack
|
||||||
fallback = _fallback_fact_answer(prompt, selected_pack)
|
fallback = _fallback_fact_answer(prompt, selected_pack)
|
||||||
|
model_context = _merge_context_blocks(selected_pack, extra_context)
|
||||||
if not subjective and fallback:
|
if not subjective and fallback:
|
||||||
if state:
|
if state:
|
||||||
state.update("done", step=_open_ended_total_steps(normalized))
|
state.update("done", step=_open_ended_total_steps(normalized))
|
||||||
return _ensure_scores(fallback)
|
return _ensure_scores(fallback)
|
||||||
if state:
|
if state:
|
||||||
state.update("drafting", step=1, note="synthesizing")
|
state.update("drafting", step=1, note="synthesizing")
|
||||||
reply = _ollama_call(
|
reply = _ollama_call_safe(
|
||||||
("atlasbot_deep", "atlasbot_deep"),
|
("atlasbot_deep", "atlasbot_deep"),
|
||||||
prompt,
|
prompt,
|
||||||
context=_append_history_context(selected_pack, history_lines),
|
context=_append_history_context(model_context, history_lines),
|
||||||
use_history=False,
|
fallback="",
|
||||||
system_override=_open_ended_system(),
|
system_override=_open_ended_system(),
|
||||||
model=model,
|
model=model,
|
||||||
timeout=_mode_ollama_timeout_sec(normalized),
|
timeout=_mode_ollama_timeout_sec(normalized),
|
||||||
@ -4369,6 +4382,7 @@ def open_ended_answer(
|
|||||||
history_lines: list[str],
|
history_lines: list[str],
|
||||||
mode: str,
|
mode: str,
|
||||||
allow_tools: bool,
|
allow_tools: bool,
|
||||||
|
context: str = "",
|
||||||
state: ThoughtState | None = None,
|
state: ThoughtState | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
lines = _fact_pack_lines(prompt, inventory=inventory, snapshot=snapshot, workloads=workloads)
|
lines = _fact_pack_lines(prompt, inventory=inventory, snapshot=snapshot, workloads=workloads)
|
||||||
@ -4393,6 +4407,7 @@ def open_ended_answer(
|
|||||||
fact_lines=lines,
|
fact_lines=lines,
|
||||||
fact_meta=fact_meta,
|
fact_meta=fact_meta,
|
||||||
history_lines=history_lines,
|
history_lines=history_lines,
|
||||||
|
extra_context=context,
|
||||||
state=state,
|
state=state,
|
||||||
)
|
)
|
||||||
return _open_ended_deep(
|
return _open_ended_deep(
|
||||||
@ -4401,6 +4416,7 @@ def open_ended_answer(
|
|||||||
fact_lines=lines,
|
fact_lines=lines,
|
||||||
fact_meta=fact_meta,
|
fact_meta=fact_meta,
|
||||||
history_lines=history_lines,
|
history_lines=history_lines,
|
||||||
|
extra_context=context,
|
||||||
mode=normalized,
|
mode=normalized,
|
||||||
state=state,
|
state=state,
|
||||||
)
|
)
|
||||||
@ -4512,6 +4528,7 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
|
|||||||
history_lines=history_lines,
|
history_lines=history_lines,
|
||||||
mode=mode,
|
mode=mode,
|
||||||
allow_tools=True,
|
allow_tools=True,
|
||||||
|
context=context,
|
||||||
state=None,
|
state=None,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@ -4880,16 +4897,18 @@ def ollama_reply_with_thinking(
|
|||||||
done = threading.Event()
|
done = threading.Event()
|
||||||
|
|
||||||
def worker():
|
def worker():
|
||||||
result["reply"] = ollama_reply(
|
try:
|
||||||
hist_key,
|
result["reply"] = ollama_reply(
|
||||||
prompt,
|
hist_key,
|
||||||
context=context,
|
prompt,
|
||||||
fallback=fallback,
|
context=context,
|
||||||
use_history=use_history,
|
fallback=fallback,
|
||||||
model=model,
|
use_history=use_history,
|
||||||
timeout=timeout,
|
model=model,
|
||||||
)
|
timeout=timeout,
|
||||||
done.set()
|
)
|
||||||
|
finally:
|
||||||
|
done.set()
|
||||||
|
|
||||||
thread = threading.Thread(target=worker, daemon=True)
|
thread = threading.Thread(target=worker, daemon=True)
|
||||||
thread.start()
|
thread.start()
|
||||||
@ -4921,6 +4940,7 @@ def open_ended_with_thinking(
|
|||||||
history_lines: list[str],
|
history_lines: list[str],
|
||||||
mode: str,
|
mode: str,
|
||||||
allow_tools: bool,
|
allow_tools: bool,
|
||||||
|
context: str = "",
|
||||||
) -> str:
|
) -> str:
|
||||||
result: dict[str, str] = {"reply": ""}
|
result: dict[str, str] = {"reply": ""}
|
||||||
done = threading.Event()
|
done = threading.Event()
|
||||||
@ -4928,17 +4948,20 @@ def open_ended_with_thinking(
|
|||||||
state = ThoughtState(total_steps=total_steps)
|
state = ThoughtState(total_steps=total_steps)
|
||||||
|
|
||||||
def worker():
|
def worker():
|
||||||
result["reply"] = open_ended_answer(
|
try:
|
||||||
prompt,
|
result["reply"] = open_ended_answer(
|
||||||
inventory=inventory,
|
prompt,
|
||||||
snapshot=snapshot,
|
inventory=inventory,
|
||||||
workloads=workloads,
|
snapshot=snapshot,
|
||||||
history_lines=history_lines,
|
workloads=workloads,
|
||||||
mode=mode,
|
history_lines=history_lines,
|
||||||
allow_tools=allow_tools,
|
mode=mode,
|
||||||
state=state,
|
allow_tools=allow_tools,
|
||||||
)
|
context=context,
|
||||||
done.set()
|
state=state,
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
done.set()
|
||||||
|
|
||||||
thread = threading.Thread(target=worker, daemon=True)
|
thread = threading.Thread(target=worker, daemon=True)
|
||||||
thread.start()
|
thread.start()
|
||||||
@ -5077,6 +5100,7 @@ def sync_loop(token: str, room_id: str, *, account_user: str, default_mode: str)
|
|||||||
history_lines=history[hist_key],
|
history_lines=history[hist_key],
|
||||||
mode=_normalize_mode(mode),
|
mode=_normalize_mode(mode),
|
||||||
allow_tools=allow_tools,
|
allow_tools=allow_tools,
|
||||||
|
context=context,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
reply = _non_cluster_reply(
|
reply = _non_cluster_reply(
|
||||||
|
|||||||
@ -100,8 +100,65 @@ class AtlasbotModeTests(TestCase):
|
|||||||
history_lines=[],
|
history_lines=[],
|
||||||
mode="genius",
|
mode="genius",
|
||||||
allow_tools=True,
|
allow_tools=True,
|
||||||
|
context='Cluster snapshot (JSON): {"injected":true}',
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertIn("The worker spread stands out", reply)
|
self.assertIn("The worker spread stands out", reply)
|
||||||
self.assertEqual(captured["model"], "genius-model")
|
self.assertEqual(captured["model"], "genius-model")
|
||||||
self.assertLessEqual(float(captured["timeout"]), 180.0)
|
self.assertLessEqual(float(captured["timeout"]), 180.0)
|
||||||
|
self.assertIn('Cluster snapshot (JSON): {"injected":true}', str(captured["context"]))
|
||||||
|
|
||||||
|
def test_mode_timeouts_stay_within_budgets(self):
|
||||||
|
fact_lines = [
|
||||||
|
"hottest_cpu: longhorn-system (6.69)",
|
||||||
|
"worker_nodes: titan-01, titan-02, titan-03",
|
||||||
|
]
|
||||||
|
seen: list[tuple[str, float]] = []
|
||||||
|
|
||||||
|
def fake_ollama_call(hist_key, prompt, *, context, use_history=True, system_override=None, model=None, timeout=None):
|
||||||
|
seen.append((str(model), float(timeout or 0)))
|
||||||
|
return "Atlas has a clear standout because the worker spread is healthy. Confidence: high"
|
||||||
|
|
||||||
|
with (
|
||||||
|
mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines),
|
||||||
|
mock.patch.object(self.bot, "_ollama_call", side_effect=fake_ollama_call),
|
||||||
|
):
|
||||||
|
for mode in ("fast", "smart", "genius"):
|
||||||
|
reply = self.bot.open_ended_answer(
|
||||||
|
"what stands out about titan lab?",
|
||||||
|
inventory=[],
|
||||||
|
snapshot=None,
|
||||||
|
workloads=[],
|
||||||
|
history_lines=[],
|
||||||
|
mode=mode,
|
||||||
|
allow_tools=True,
|
||||||
|
)
|
||||||
|
self.assertIn("Confidence:", reply)
|
||||||
|
|
||||||
|
self.assertEqual([model for model, _ in seen], ["fast-model", "smart-model", "genius-model"])
|
||||||
|
self.assertLessEqual(seen[0][1], 15.0)
|
||||||
|
self.assertLessEqual(seen[1][1], 45.0)
|
||||||
|
self.assertLessEqual(seen[2][1], 180.0)
|
||||||
|
|
||||||
|
def test_llm_timeout_still_returns_a_conclusion(self):
|
||||||
|
fact_lines = [
|
||||||
|
"worker_nodes: titan-01, titan-02, titan-03",
|
||||||
|
"hottest_cpu: longhorn-system (6.69)",
|
||||||
|
]
|
||||||
|
|
||||||
|
with (
|
||||||
|
mock.patch.object(self.bot, "_fact_pack_lines", return_value=fact_lines),
|
||||||
|
mock.patch.object(self.bot, "_ollama_call", side_effect=TimeoutError("simulated timeout")),
|
||||||
|
):
|
||||||
|
reply = self.bot.open_ended_answer(
|
||||||
|
"what stands out about the worker nodes?",
|
||||||
|
inventory=[],
|
||||||
|
snapshot=None,
|
||||||
|
workloads=[],
|
||||||
|
history_lines=[],
|
||||||
|
mode="genius",
|
||||||
|
allow_tools=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertIn("worker nodes", reply.lower())
|
||||||
|
self.assertIn("Confidence:", reply)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user