atlasbot: overhaul reasoning pipeline

This commit is contained in:
Brad Stein 2026-01-27 23:45:08 -03:00
parent b34f2abefd
commit a10050e4c7
2 changed files with 336 additions and 75 deletions

View File

@ -16,7 +16,7 @@ spec:
labels: labels:
app: atlasbot app: atlasbot
annotations: annotations:
checksum/atlasbot-configmap: manual-atlasbot-81 checksum/atlasbot-configmap: manual-atlasbot-82
vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "comms" vault.hashicorp.com/role: "comms"
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
@ -83,6 +83,10 @@ spec:
value: http://ollama.ai.svc.cluster.local:11434 value: http://ollama.ai.svc.cluster.local:11434
- name: OLLAMA_MODEL - name: OLLAMA_MODEL
value: qwen2.5:14b-instruct value: qwen2.5:14b-instruct
- name: ATLASBOT_MODEL_FAST
value: qwen2.5:14b-instruct
- name: ATLASBOT_MODEL_DEEP
value: qwen2.5:14b-instruct
- name: OLLAMA_FALLBACK_MODEL - name: OLLAMA_FALLBACK_MODEL
value: qwen2.5:14b-instruct-q4_0 value: qwen2.5:14b-instruct-q4_0
- name: OLLAMA_TIMEOUT_SEC - name: OLLAMA_TIMEOUT_SEC

View File

@ -17,6 +17,8 @@ ROOM_ALIAS = "#othrys:live.bstein.dev"
OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/") OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/")
MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0") MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0")
MODEL_FAST = os.environ.get("ATLASBOT_MODEL_FAST", "")
MODEL_DEEP = os.environ.get("ATLASBOT_MODEL_DEEP", "")
FALLBACK_MODEL = os.environ.get("OLLAMA_FALLBACK_MODEL", "") FALLBACK_MODEL = os.environ.get("OLLAMA_FALLBACK_MODEL", "")
API_KEY = os.environ.get("CHAT_API_KEY", "") API_KEY = os.environ.get("CHAT_API_KEY", "")
OLLAMA_TIMEOUT_SEC = float(os.environ.get("OLLAMA_TIMEOUT_SEC", "480")) OLLAMA_TIMEOUT_SEC = float(os.environ.get("OLLAMA_TIMEOUT_SEC", "480"))
@ -372,6 +374,14 @@ def _detect_mode_from_body(body: str, *, default: str = "deep") -> str:
return default return default
def _model_for_mode(mode: str) -> str:
if mode == "fast" and MODEL_FAST:
return MODEL_FAST
if mode == "deep" and MODEL_DEEP:
return MODEL_DEEP
return MODEL
# Matrix HTTP helper. # Matrix HTTP helper.
def req(method: str, path: str, token: str | None = None, body=None, timeout=60, base: str | None = None): def req(method: str, path: str, token: str | None = None, body=None, timeout=60, base: str | None = None):
url = (base or BASE) + path url = (base or BASE) + path
@ -2487,7 +2497,13 @@ class ThoughtState:
return f"Still thinking ({detail})." return f"Still thinking ({detail})."
def _ollama_json_call(prompt: str, *, context: str, retries: int = 2) -> dict[str, Any]: def _ollama_json_call(
prompt: str,
*,
context: str,
retries: int = 2,
model: str | None = None,
) -> dict[str, Any]:
system = ( system = (
"System: You are Atlas, a reasoning assistant. " "System: You are Atlas, a reasoning assistant. "
"Return strict JSON only (no code fences, no trailing commentary). " "Return strict JSON only (no code fences, no trailing commentary). "
@ -2504,6 +2520,7 @@ def _ollama_json_call(prompt: str, *, context: str, retries: int = 2) -> dict[st
context=context, context=context,
use_history=False, use_history=False,
system_override=system, system_override=system,
model=model,
) )
cleaned = _strip_code_fence(raw).strip() cleaned = _strip_code_fence(raw).strip()
if cleaned.startswith("{") and cleaned.endswith("}"): if cleaned.startswith("{") and cleaned.endswith("}"):
@ -2547,6 +2564,19 @@ def _fact_pack_text(lines: list[str]) -> str:
return "Fact pack:\n" + "\n".join(labeled) return "Fact pack:\n" + "\n".join(labeled)
def _tool_fact_lines(prompt: str, *, allow_tools: bool) -> list[str]:
if not allow_tools:
return []
metrics_context, _ = metrics_query_context(prompt, allow_tools=True)
lines: list[str] = []
if metrics_context:
for line in metrics_context.splitlines():
trimmed = line.strip()
if trimmed:
lines.append(f"tool_metrics: {trimmed}")
return lines
_ALLOWED_INSIGHT_TAGS = { _ALLOWED_INSIGHT_TAGS = {
"availability", "availability",
"architecture", "architecture",
@ -2607,6 +2637,15 @@ def _history_tags(history_lines: list[str]) -> set[str]:
return tags & _ALLOWED_INSIGHT_TAGS return tags & _ALLOWED_INSIGHT_TAGS
def _normalize_fraction(value: Any, *, default: float = 0.5) -> float:
if isinstance(value, (int, float)):
score = float(value)
if score > 1:
score = score / 100.0
return max(0.0, min(1.0, score))
return default
def _seed_insights( def _seed_insights(
lines: list[str], lines: list[str],
fact_meta: dict[str, dict[str, Any]], fact_meta: dict[str, dict[str, Any]],
@ -2735,9 +2774,9 @@ def _open_ended_system() -> str:
"Use ONLY the provided fact pack and recent chat as your evidence. " "Use ONLY the provided fact pack and recent chat as your evidence. "
"You may draw light inferences if you label them as such. " "You may draw light inferences if you label them as such. "
"Write concise, human sentences with a helpful, calm tone (not a list). " "Write concise, human sentences with a helpful, calm tone (not a list). "
"If the question is subjective, share a light opinion grounded in facts. " "If the question is subjective, share a light opinion grounded in facts and explain why it stands out. "
"If the question is ambiguous, pick a reasonable interpretation and state it briefly. " "If the question is ambiguous, pick a reasonable interpretation and state it briefly. "
"Avoid repeating the exact same observation as the last response if possible. " "Avoid repeating the exact same observation as the last response if possible; vary across metrics, workload, or hardware details. "
"Do not invent numbers or facts. " "Do not invent numbers or facts. "
"End with lines: Confidence, Relevance (0-100), Satisfaction (0-100), HallucinationRisk (low|medium|high)." "End with lines: Confidence, Relevance (0-100), Satisfaction (0-100), HallucinationRisk (low|medium|high)."
) )
@ -2750,6 +2789,7 @@ def _ollama_call_safe(
context: str, context: str,
fallback: str, fallback: str,
system_override: str | None = None, system_override: str | None = None,
model: str | None = None,
) -> str: ) -> str:
try: try:
return _ollama_call( return _ollama_call(
@ -2758,6 +2798,7 @@ def _ollama_call_safe(
context=context, context=context,
use_history=False, use_history=False,
system_override=system_override, system_override=system_override,
model=model,
) )
except Exception: except Exception:
return fallback return fallback
@ -2841,6 +2882,7 @@ def _open_ended_plan(
history_lines: list[str], history_lines: list[str],
count: int, count: int,
state: ThoughtState | None, state: ThoughtState | None,
model: str | None,
) -> list[dict[str, Any]]: ) -> list[dict[str, Any]]:
if state: if state:
state.update("planning", step=1, note="mapping angles") state.update("planning", step=1, note="mapping angles")
@ -2850,10 +2892,15 @@ def _open_ended_plan(
f"{count} distinct answer angles that can be supported by the fact pack. " f"{count} distinct answer angles that can be supported by the fact pack. "
"Keep them diverse (e.g., metrics, hardware, workload placement, recent changes). " "Keep them diverse (e.g., metrics, hardware, workload placement, recent changes). "
"If the question is subjective, propose at least one angle that surfaces a standout detail. " "If the question is subjective, propose at least one angle that surfaces a standout detail. "
"Avoid repeating the same angle as the most recent response if possible. "
"Return JSON: {\"angles\":[{\"focus\":\"...\",\"reason\":\"...\",\"priority\":1-5}]}." "Return JSON: {\"angles\":[{\"focus\":\"...\",\"reason\":\"...\",\"priority\":1-5}]}."
) )
context = _append_history_context(fact_pack, history_lines) context = _append_history_context(fact_pack, history_lines)
result = _ollama_json_call(prompt_text + f" Question: {prompt}", context=context) result = _ollama_json_call(
prompt_text + f" Question: {prompt}",
context=context,
model=model,
)
angles = result.get("angles") if isinstance(result, dict) else None angles = result.get("angles") if isinstance(result, dict) else None
cleaned: list[dict[str, Any]] = [] cleaned: list[dict[str, Any]] = []
seen: set[str] = set() seen: set[str] = set()
@ -2883,6 +2930,81 @@ def _open_ended_plan(
return cleaned return cleaned
def _preferred_tags_for_prompt(prompt: str) -> set[str]:
q = normalize_query(prompt)
tags: set[str] = set()
if any(word in q for word in ("cpu", "ram", "memory", "net", "network", "io", "disk", "hottest", "busy", "usage", "utilization", "load")):
tags.add("utilization")
if any(word in q for word in ("postgres", "database", "db", "connections")):
tags.add("database")
if any(word in q for word in ("pod", "pods", "deployment", "job", "cronjob")):
tags.add("pods")
if any(word in q for word in ("workload", "service", "namespace")):
tags.add("workloads")
if any(word in q for word in ("ready", "not ready", "down", "unreachable", "availability")):
tags.add("availability")
if any(word in q for word in ("node", "nodes", "hardware", "arch", "architecture", "rpi", "jetson", "amd64", "arm64", "worker", "control-plane")):
tags.update({"hardware", "inventory", "architecture"})
return tags & _ALLOWED_INSIGHT_TAGS
def _open_ended_insights(
prompt: str,
*,
fact_pack: str,
fact_meta: dict[str, dict[str, Any]],
history_lines: list[str],
count: int,
state: ThoughtState | None,
model: str | None,
) -> list[dict[str, Any]]:
if state:
state.update("analyzing", note="scouting insights")
count = max(1, count)
allowed_tags = ", ".join(sorted(_ALLOWED_INSIGHT_TAGS))
prompt_text = (
"Review the fact pack and propose up to "
f"{count} insights that could answer the question. "
"Each insight should be grounded in the facts. "
"Return JSON: {\"insights\":[{\"summary\":\"...\",\"fact_ids\":[\"F1\"],"
"\"relevance\":0-1,\"novelty\":0-1,\"tags\":[\"tag\"],\"rationale\":\"...\"}]}. "
f"Only use tags from: {allowed_tags}."
)
context = _append_history_context(fact_pack, history_lines)
result = _ollama_json_call(
prompt_text + f" Question: {prompt}",
context=context,
model=model,
)
insights = result.get("insights") if isinstance(result, dict) else None
cleaned: list[dict[str, Any]] = []
valid_ids = set(fact_meta.keys())
if isinstance(insights, list):
for item in insights:
if not isinstance(item, dict):
continue
summary = str(item.get("summary") or item.get("claim") or "").strip()
if not summary:
continue
raw_ids = item.get("fact_ids") if isinstance(item.get("fact_ids"), list) else []
fact_ids = [fid for fid in raw_ids if isinstance(fid, str) and fid in valid_ids]
if not fact_ids:
continue
cleaned.append(
{
"summary": summary,
"fact_ids": fact_ids,
"relevance": _normalize_fraction(item.get("relevance"), default=0.6),
"novelty": _normalize_fraction(item.get("novelty"), default=0.5),
"rationale": str(item.get("rationale") or ""),
"tags": [t for t in (item.get("tags") or []) if isinstance(t, str)],
}
)
if cleaned and state:
state.update("analyzing", note=_candidate_note(cleaned[0]))
return cleaned
def _normalize_score(value: Any, *, default: int = 60) -> int: def _normalize_score(value: Any, *, default: int = 60) -> int:
if isinstance(value, (int, float)): if isinstance(value, (int, float)):
return int(max(0, min(100, value))) return int(max(0, min(100, value)))
@ -2915,20 +3037,31 @@ def _open_ended_candidate(
history_lines: list[str], history_lines: list[str],
state: ThoughtState | None, state: ThoughtState | None,
step: int, step: int,
fact_hints: list[str] | None = None,
model: str | None = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
if state: if state:
state.update("drafting", step=step, note=focus) state.update("drafting", step=step, note=focus)
hint_text = ""
if fact_hints:
hint_text = " Prioritize these fact IDs if relevant: " + ", ".join(fact_hints) + "."
prompt_text = ( prompt_text = (
"Using ONLY the fact pack, answer the question focusing on this angle: " "Using ONLY the fact pack, answer the question focusing on this angle: "
f"{focus}. " f"{focus}. "
"Write 2-4 sentences in plain prose (not a list). " "Write 2-4 sentences in plain prose (not a list)."
+ hint_text
+ " "
"If you infer, label it as inference. " "If you infer, label it as inference. "
"List which fact pack IDs you used. " "List which fact pack IDs you used. "
"Return JSON: {\"answer\":\"...\",\"facts_used\":[\"F1\"],\"confidence\":\"high|medium|low\"," "Return JSON: {\"answer\":\"...\",\"facts_used\":[\"F1\"],\"confidence\":\"high|medium|low\","
"\"relevance\":0-100,\"satisfaction\":0-100,\"risk\":\"low|medium|high\"}." "\"relevance\":0-100,\"satisfaction\":0-100,\"risk\":\"low|medium|high\"}."
) )
context = _append_history_context(fact_pack, history_lines) context = _append_history_context(fact_pack, history_lines)
result = _ollama_json_call(prompt_text + f" Question: {prompt}", context=context) result = _ollama_json_call(
prompt_text + f" Question: {prompt}",
context=context,
model=model,
)
if not isinstance(result, dict): if not isinstance(result, dict):
result = {} result = {}
answer = str(result.get("answer") or "").strip() answer = str(result.get("answer") or "").strip()
@ -2986,9 +3119,12 @@ def _open_ended_synthesize(
candidates: list[dict[str, Any]], candidates: list[dict[str, Any]],
state: ThoughtState | None, state: ThoughtState | None,
step: int, step: int,
model: str | None,
critique: str | None = None,
) -> str: ) -> str:
if state: if state:
state.update("synthesizing", step=step, note="composing answer") state.update("synthesizing", step=step, note="composing answer")
critique_block = f"\nCritique guidance: {critique}\n" if critique else "\n"
synth_prompt = ( synth_prompt = (
"Compose the final answer to the question using the candidate answers below. " "Compose the final answer to the question using the candidate answers below. "
"Select the best 1-2 candidates, blend them if helpful, and keep 2-4 sentences. " "Select the best 1-2 candidates, blend them if helpful, and keep 2-4 sentences. "
@ -3001,6 +3137,7 @@ def _open_ended_synthesize(
"End with lines: Confidence, Relevance (0-100), Satisfaction (0-100), " "End with lines: Confidence, Relevance (0-100), Satisfaction (0-100), "
"HallucinationRisk (low|medium|high).\n" "HallucinationRisk (low|medium|high).\n"
f"Question: {prompt}\n" f"Question: {prompt}\n"
f"{critique_block}"
f"Candidates: {json.dumps(candidates, ensure_ascii=False)}" f"Candidates: {json.dumps(candidates, ensure_ascii=False)}"
) )
context = _append_history_context(fact_pack, history_lines) context = _append_history_context(fact_pack, history_lines)
@ -3010,20 +3147,55 @@ def _open_ended_synthesize(
context=context, context=context,
fallback="I don't have enough data to answer that.", fallback="I don't have enough data to answer that.",
system_override=_open_ended_system(), system_override=_open_ended_system(),
model=model,
) )
return _ensure_scores(reply) return _ensure_scores(reply)
def _open_ended_critique(
prompt: str,
*,
fact_pack: str,
history_lines: list[str],
candidates: list[dict[str, Any]],
state: ThoughtState | None,
step: int,
model: str | None,
) -> str:
if state:
state.update("reviewing", step=step, note="quality check")
critique_prompt = (
"Review the candidate answers against the fact pack. "
"Identify any missing important detail or risky inference and give one sentence of guidance. "
"Return JSON: {\"guidance\":\"...\",\"risk\":\"low|medium|high\"}."
)
context = _append_history_context(fact_pack, history_lines)
result = _ollama_json_call(
critique_prompt + f" Question: {prompt} Candidates: {json.dumps(candidates, ensure_ascii=False)}",
context=context,
model=model,
)
if isinstance(result, dict):
guidance = str(result.get("guidance") or "").strip()
if guidance:
return guidance
return ""
def _open_ended_multi( def _open_ended_multi(
prompt: str, prompt: str,
*, *,
fact_pack: str, fact_pack: str,
fact_lines: list[str],
fact_meta: dict[str, dict[str, Any]],
history_lines: list[str], history_lines: list[str],
mode: str, mode: str,
state: ThoughtState | None = None, state: ThoughtState | None = None,
) -> str: ) -> str:
model = _model_for_mode(mode)
angle_count = 2 if mode == "fast" else 4 angle_count = 2 if mode == "fast" else 4
total_steps = 1 + angle_count + 2 insight_count = 2 if mode == "fast" else 4
total_steps = 2 + angle_count + 2 + (1 if mode == "deep" else 0)
if state: if state:
state.total_steps = total_steps state.total_steps = total_steps
angles = _open_ended_plan( angles = _open_ended_plan(
@ -3032,10 +3204,57 @@ def _open_ended_multi(
history_lines=history_lines, history_lines=history_lines,
count=angle_count, count=angle_count,
state=state, state=state,
model=model,
) )
insights = _open_ended_insights(
prompt,
fact_pack=fact_pack,
fact_meta=fact_meta,
history_lines=history_lines,
count=insight_count,
state=state,
model=model,
)
seeds = _seed_insights(fact_lines, fact_meta, limit=max(4, insight_count))
insight_candidates = insights + seeds
subjective = _is_subjective_query(prompt)
prefer_tags = _preferred_tags_for_prompt(prompt)
history_tags = _history_tags(history_lines)
avoid_tags = history_tags if subjective else set()
preference = "novelty" if subjective else "relevance"
selected_insights = _select_diverse_insights(
insight_candidates,
preference=preference,
prefer_tags=prefer_tags,
avoid_tags=avoid_tags,
history_tags=history_tags,
fact_meta=fact_meta,
count=1 if mode == "fast" else 2,
)
if state and selected_insights:
state.update("analyzing", note=_candidate_note(selected_insights[0]))
angle_inputs: list[dict[str, Any]] = []
for insight in selected_insights:
angle_inputs.append(
{
"focus": str(insight.get("summary") or "Direct answer"),
"fact_ids": insight.get("fact_ids") or [],
}
)
for angle in angles:
if len(angle_inputs) >= angle_count:
break
angle_inputs.append(
{
"focus": str(angle.get("focus") or "Direct answer"),
"fact_ids": [],
}
)
candidates: list[dict[str, Any]] = [] candidates: list[dict[str, Any]] = []
step = 2 step = 3
for angle in angles[:angle_count]: for angle in angle_inputs[:angle_count]:
candidates.append( candidates.append(
_open_ended_candidate( _open_ended_candidate(
prompt, prompt,
@ -3044,6 +3263,8 @@ def _open_ended_multi(
history_lines=history_lines, history_lines=history_lines,
state=state, state=state,
step=step, step=step,
fact_hints=angle.get("fact_ids") if isinstance(angle.get("fact_ids"), list) else None,
model=model,
) )
) )
step += 1 step += 1
@ -3051,6 +3272,18 @@ def _open_ended_multi(
state.update("evaluating", step=step, note="ranking candidates") state.update("evaluating", step=step, note="ranking candidates")
selected = _select_candidates(candidates, count=1 if mode == "fast" else 2) selected = _select_candidates(candidates, count=1 if mode == "fast" else 2)
step += 1 step += 1
critique = ""
if mode == "deep":
critique = _open_ended_critique(
prompt,
fact_pack=fact_pack,
history_lines=history_lines,
candidates=selected or candidates,
state=state,
step=step,
model=model,
)
step += 1
reply = _open_ended_synthesize( reply = _open_ended_synthesize(
prompt, prompt,
fact_pack=fact_pack, fact_pack=fact_pack,
@ -3058,6 +3291,8 @@ def _open_ended_multi(
candidates=selected or candidates, candidates=selected or candidates,
state=state, state=state,
step=step, step=step,
model=model,
critique=critique,
) )
if state: if state:
state.update("done", step=total_steps) state.update("done", step=total_steps)
@ -3066,19 +3301,23 @@ def _open_ended_multi(
def _open_ended_total_steps(mode: str) -> int: def _open_ended_total_steps(mode: str) -> int:
angle_count = 2 if mode == "fast" else 4 angle_count = 2 if mode == "fast" else 4
return 1 + angle_count + 2 return 2 + angle_count + 2 + (1 if mode == "deep" else 0)
def _open_ended_fast( def _open_ended_fast(
prompt: str, prompt: str,
*, *,
fact_pack: str, fact_pack: str,
fact_lines: list[str],
fact_meta: dict[str, dict[str, Any]],
history_lines: list[str], history_lines: list[str],
state: ThoughtState | None = None, state: ThoughtState | None = None,
) -> str: ) -> str:
return _open_ended_multi( return _open_ended_multi(
prompt, prompt,
fact_pack=fact_pack, fact_pack=fact_pack,
fact_lines=fact_lines,
fact_meta=fact_meta,
history_lines=history_lines, history_lines=history_lines,
mode="fast", mode="fast",
state=state, state=state,
@ -3089,12 +3328,16 @@ def _open_ended_deep(
prompt: str, prompt: str,
*, *,
fact_pack: str, fact_pack: str,
fact_lines: list[str],
fact_meta: dict[str, dict[str, Any]],
history_lines: list[str], history_lines: list[str],
state: ThoughtState | None = None, state: ThoughtState | None = None,
) -> str: ) -> str:
return _open_ended_multi( return _open_ended_multi(
prompt, prompt,
fact_pack=fact_pack, fact_pack=fact_pack,
fact_lines=fact_lines,
fact_meta=fact_meta,
history_lines=history_lines, history_lines=history_lines,
mode="deep", mode="deep",
state=state, state=state,
@ -3109,31 +3352,61 @@ def open_ended_answer(
workloads: list[dict[str, Any]], workloads: list[dict[str, Any]],
history_lines: list[str], history_lines: list[str],
mode: str, mode: str,
allow_tools: bool,
state: ThoughtState | None = None, state: ThoughtState | None = None,
) -> str: ) -> str:
lines = _fact_pack_lines(prompt, inventory=inventory, snapshot=snapshot, workloads=workloads) lines = _fact_pack_lines(prompt, inventory=inventory, snapshot=snapshot, workloads=workloads)
if _knowledge_intent(prompt) or _doc_intent(prompt):
kb_detail = kb_retrieve(prompt)
if kb_detail:
for line in kb_detail.splitlines():
if line.strip():
lines.append(line.strip())
tool_lines = _tool_fact_lines(prompt, allow_tools=allow_tools)
if tool_lines:
lines.extend(tool_lines)
if not lines: if not lines:
return _ensure_scores("I don't have enough data to answer that.") return _ensure_scores("I don't have enough data to answer that.")
fact_pack = _fact_pack_text(lines) fact_pack = _fact_pack_text(lines)
fact_meta = _fact_pack_meta(lines)
if mode == "fast": if mode == "fast":
return _open_ended_fast( return _open_ended_fast(
prompt, prompt,
fact_pack=fact_pack, fact_pack=fact_pack,
fact_lines=lines,
fact_meta=fact_meta,
history_lines=history_lines, history_lines=history_lines,
state=state, state=state,
) )
return _open_ended_deep( return _open_ended_deep(
prompt, prompt,
fact_pack=fact_pack, fact_pack=fact_pack,
fact_lines=lines,
fact_meta=fact_meta,
history_lines=history_lines, history_lines=history_lines,
state=state, state=state,
) )
def _non_cluster_reply(prompt: str) -> str: def _non_cluster_reply(prompt: str, *, history_lines: list[str], mode: str) -> str:
return _ensure_scores( system = (
"I focus on the Atlas/Othrys cluster and don't have enough data to answer that." "System: You are Atlas, a helpful general assistant. "
"Answer using common knowledge when possible, and say when you're unsure. "
"Be concise and avoid unnecessary caveats. "
"Respond in plain sentences (no lists unless asked). "
"End every response with a line: 'Confidence: high|medium|low'."
) )
model = _model_for_mode(mode)
context = _append_history_context("", history_lines) if history_lines else ""
reply = _ollama_call(
("general", "reply"),
prompt,
context=context,
use_history=False,
system_override=system,
model=model,
)
return _ensure_scores(reply)
# Internal HTTP endpoint for cluster answers (website uses this). # Internal HTTP endpoint for cluster answers (website uses this).
@ -3183,7 +3456,11 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
return return
cleaned = _strip_bot_mention(prompt) cleaned = _strip_bot_mention(prompt)
mode = str(payload.get("mode") or "deep").lower() mode = str(payload.get("mode") or "deep").lower()
if mode not in ("fast", "deep"): if mode in ("quick", "fast"):
mode = "fast"
elif mode in ("smart", "deep"):
mode = "deep"
else:
mode = "deep" mode = "deep"
snapshot = _snapshot_state() snapshot = _snapshot_state()
inventory = _snapshot_inventory(snapshot) or node_inventory_live() inventory = _snapshot_inventory(snapshot) or node_inventory_live()
@ -3212,37 +3489,19 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
snapshot=snapshot, snapshot=snapshot,
workloads=workloads, workloads=workloads,
) )
fallback = "I don't have enough data to answer that."
if cluster_query: if cluster_query:
open_ended = ( answer = open_ended_answer(
_is_subjective_query(cleaned) cleaned,
or _knowledge_intent(cleaned) inventory=inventory,
or _is_overview_query(cleaned) snapshot=snapshot,
or _doc_intent(cleaned) workloads=workloads,
history_lines=history_lines,
mode=mode,
allow_tools=False,
state=None,
) )
if open_ended:
answer = open_ended_answer(
cleaned,
inventory=inventory,
snapshot=snapshot,
workloads=workloads,
history_lines=history_lines,
mode=mode,
state=None,
)
else:
answer = (
cluster_answer(
cleaned,
inventory=inventory,
snapshot=snapshot,
workloads=workloads,
history_lines=history_lines,
)
or fallback
)
else: else:
answer = _non_cluster_reply(cleaned) answer = _non_cluster_reply(cleaned, history_lines=history_lines, mode=mode)
self._write_json(200, {"answer": answer}) self._write_json(200, {"answer": answer})
@ -3490,6 +3749,7 @@ def _ollama_call(
context: str, context: str,
use_history: bool = True, use_history: bool = True,
system_override: str | None = None, system_override: str | None = None,
model: str | None = None,
) -> str: ) -> str:
system = system_override or ( system = system_override or (
"System: You are Atlas, the Titan lab assistant for Atlas/Othrys. " "System: You are Atlas, the Titan lab assistant for Atlas/Othrys. "
@ -3521,7 +3781,8 @@ def _ollama_call(
messages.extend(_history_to_messages(history[hist_key][-24:])) messages.extend(_history_to_messages(history[hist_key][-24:]))
messages.append({"role": "user", "content": prompt}) messages.append({"role": "user", "content": prompt})
payload = {"model": MODEL, "messages": messages, "stream": False} model_name = model or MODEL
payload = {"model": model_name, "messages": messages, "stream": False}
headers = {"Content-Type": "application/json"} headers = {"Content-Type": "application/json"}
if API_KEY: if API_KEY:
headers["x-api-key"] = API_KEY headers["x-api-key"] = API_KEY
@ -3561,11 +3822,18 @@ def ollama_reply(
context: str, context: str,
fallback: str = "", fallback: str = "",
use_history: bool = True, use_history: bool = True,
model: str | None = None,
) -> str: ) -> str:
last_error = None last_error = None
for attempt in range(max(1, OLLAMA_RETRIES + 1)): for attempt in range(max(1, OLLAMA_RETRIES + 1)):
try: try:
return _ollama_call(hist_key, prompt, context=context, use_history=use_history) return _ollama_call(
hist_key,
prompt,
context=context,
use_history=use_history,
model=model,
)
except Exception as exc: # noqa: BLE001 except Exception as exc: # noqa: BLE001
last_error = exc last_error = exc
time.sleep(min(4, 2 ** attempt)) time.sleep(min(4, 2 ** attempt))
@ -3584,6 +3852,7 @@ def ollama_reply_with_thinking(
context: str, context: str,
fallback: str, fallback: str,
use_history: bool = True, use_history: bool = True,
model: str | None = None,
) -> str: ) -> str:
result: dict[str, str] = {"reply": ""} result: dict[str, str] = {"reply": ""}
done = threading.Event() done = threading.Event()
@ -3595,6 +3864,7 @@ def ollama_reply_with_thinking(
context=context, context=context,
fallback=fallback, fallback=fallback,
use_history=use_history, use_history=use_history,
model=model,
) )
done.set() done.set()
@ -3627,6 +3897,7 @@ def open_ended_with_thinking(
workloads: list[dict[str, Any]], workloads: list[dict[str, Any]],
history_lines: list[str], history_lines: list[str],
mode: str, mode: str,
allow_tools: bool,
) -> str: ) -> str:
result: dict[str, str] = {"reply": ""} result: dict[str, str] = {"reply": ""}
done = threading.Event() done = threading.Event()
@ -3641,6 +3912,7 @@ def open_ended_with_thinking(
workloads=workloads, workloads=workloads,
history_lines=history_lines, history_lines=history_lines,
mode=mode, mode=mode,
allow_tools=allow_tools,
state=state, state=state,
) )
done.set() done.set()
@ -3766,39 +4038,24 @@ def sync_loop(token: str, room_id: str):
extra = "VictoriaMetrics (PromQL result):\n" + rendered extra = "VictoriaMetrics (PromQL result):\n" + rendered
send_msg(token, rid, extra) send_msg(token, rid, extra)
continue continue
fallback = "I don't have enough data to answer that."
if cluster_query: if cluster_query:
open_ended = ( reply = open_ended_with_thinking(
_is_subjective_query(cleaned_body) token,
or _knowledge_intent(cleaned_body) rid,
or _is_overview_query(cleaned_body) cleaned_body,
or _doc_intent(cleaned_body) inventory=inventory,
snapshot=snapshot,
workloads=workloads,
history_lines=history[hist_key],
mode=mode if mode in ("fast", "deep") else "deep",
allow_tools=allow_tools,
) )
if open_ended:
reply = open_ended_with_thinking(
token,
rid,
cleaned_body,
inventory=inventory,
snapshot=snapshot,
workloads=workloads,
history_lines=history[hist_key],
mode=mode if mode in ("fast", "deep") else "deep",
)
else:
reply = (
cluster_answer(
cleaned_body,
inventory=inventory,
snapshot=snapshot,
workloads=workloads,
history_lines=history[hist_key],
)
or fallback
)
else: else:
reply = _non_cluster_reply(cleaned_body) reply = _non_cluster_reply(
cleaned_body,
history_lines=history[hist_key],
mode=mode if mode in ("fast", "deep") else "deep",
)
send_msg(token, rid, reply) send_msg(token, rid, reply)
history[hist_key].append(f"Atlas: {reply}") history[hist_key].append(f"Atlas: {reply}")
history[hist_key] = history[hist_key][-80:] history[hist_key] = history[hist_key][-80:]