atlasbot: add knowledge summaries and better fallback

This commit is contained in:
Brad Stein 2026-01-27 04:51:20 -03:00
parent 0ef14c67fd
commit c219019ad5

View File

@ -254,14 +254,14 @@ def load_kb():
_NAME_INDEX = names
_METRIC_INDEX = metrics if isinstance(metrics, list) else []
def kb_retrieve(query: str, *, limit: int = 3) -> str:
def _score_kb_docs(query: str) -> list[dict[str, Any]]:
q = (query or "").strip()
if not q or not KB.get("runbooks"):
return ""
return []
ql = q.lower()
q_tokens = _tokens(q)
if not q_tokens:
return ""
return []
scored: list[tuple[int, dict]] = []
for doc in KB.get("runbooks", []):
@ -281,9 +281,16 @@ def kb_retrieve(query: str, *, limit: int = 3) -> str:
score += 4
if score:
scored.append((score, doc))
scored.sort(key=lambda x: x[0], reverse=True)
picked = [d for _, d in scored[:limit]]
return [d for _, d in scored]
def kb_retrieve(query: str, *, limit: int = 3) -> str:
q = (query or "").strip()
if not q:
return ""
scored = _score_kb_docs(q)
picked = scored[:limit]
if not picked:
return ""
@ -301,6 +308,22 @@ def kb_retrieve(query: str, *, limit: int = 3) -> str:
used += len(chunk)
return "\n".join(parts).strip()
def kb_retrieve_titles(query: str, *, limit: int = 4) -> str:
scored = _score_kb_docs(query)
picked = scored[:limit]
if not picked:
return ""
parts = ["Relevant runbooks:"]
for doc in picked:
title = doc.get("title") or doc.get("path") or "runbook"
path = doc.get("path") or ""
if path:
parts.append(f"- {title} ({path})")
else:
parts.append(f"- {title}")
return "\n".join(parts)
def _extract_titan_nodes(text: str) -> list[str]:
cleaned = normalize_query(text)
names = {n.lower() for n in TITAN_NODE_RE.findall(cleaned) if n}
@ -439,6 +462,18 @@ def _format_metric_label(metric: dict[str, Any]) -> str:
return ", ".join(label_parts) if label_parts else "series"
def _primary_series_metric(res: dict | None) -> tuple[str | None, str | None]:
series = _vm_value_series(res or {})
if not series:
return (None, None)
first = series[0]
metric = first.get("metric") if isinstance(first, dict) else {}
value = first.get("value") if isinstance(first, dict) else []
node = metric.get("node") if isinstance(metric, dict) else None
val = value[1] if isinstance(value, list) and len(value) > 1 else None
return (node, val)
def _format_metric_answer(entry: dict[str, Any], res: dict | None) -> str:
series = _vm_value_series(res)
panel = entry.get("panel_title") or "Metric"
@ -677,7 +712,15 @@ def structured_answer(prompt: str, *, inventory: list[dict[str, Any]], metrics_s
scope_parts.append("worker")
if scope_parts:
scope = " ".join(scope_parts)
return f"Among {scope} nodes, {answer}"
overall_note = ""
base_res = vm_query(entry["exprs"][0], timeout=20)
base_node, base_val = _primary_series_metric(base_res)
scoped_node, scoped_val = _primary_series_metric(res)
if base_node and scoped_node and base_node != scoped_node:
percent = _metric_expr_uses_percent(entry)
base_val_fmt = _format_metric_value(base_val or "", percent=percent)
overall_note = f" Overall hottest node: {base_node} ({base_val_fmt})."
return f"Among {scope} nodes, {answer}{overall_note}"
return answer
if metrics_summary:
return metrics_summary
@ -1075,7 +1118,7 @@ def _context_fallback(context: str) -> str:
trimmed = context.strip()
if len(trimmed) > MAX_TOOL_CHARS:
trimmed = trimmed[: MAX_TOOL_CHARS - 3].rstrip() + "..."
return "I couldnt reach the model backend. Here is the data I found:\n" + trimmed
return "Here is what I found:\n" + trimmed
def vm_top_restarts(hours: int = 1) -> str:
q = f"topk(5, sum by (namespace,pod) (increase(kube_pod_container_status_restarts_total[{hours}h])))"
@ -1192,6 +1235,11 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
return
inventory = node_inventory_live()
answer = structured_answer(prompt, inventory=inventory, metrics_summary="")
if not answer and _knowledge_intent(prompt):
answer = knowledge_summary(prompt, inventory)
if not answer:
kb = kb_retrieve_titles(prompt, limit=4)
answer = kb or ""
self._write_json(200, {"answer": answer})
@ -1257,6 +1305,48 @@ def build_context(
return "\n\n".join([p for p in parts if p]).strip()
def _knowledge_intent(prompt: str) -> bool:
q = normalize_query(prompt)
return any(
phrase in q
for phrase in (
"what do you know",
"tell me about",
"overview",
"summary",
"describe",
"explain",
"what is",
)
)
def _inventory_summary(inventory: list[dict[str, Any]]) -> str:
if not inventory:
return ""
groups = _group_nodes(inventory)
total = len(inventory)
ready = [n for n in inventory if n.get("ready") is True]
not_ready = [n for n in inventory if n.get("ready") is False]
parts = [f"Atlas cluster: {total} nodes ({len(ready)} ready, {len(not_ready)} not ready)."]
for key in ("rpi5", "rpi4", "jetson", "amd64", "arm64-unknown", "unknown"):
nodes = groups.get(key) or []
if nodes:
parts.append(f"- {key}: {len(nodes)} nodes ({', '.join(nodes)})")
return "\n".join(parts)
def knowledge_summary(prompt: str, inventory: list[dict[str, Any]]) -> str:
parts: list[str] = []
inv = _inventory_summary(inventory)
if inv:
parts.append(inv)
kb_titles = kb_retrieve_titles(prompt, limit=4)
if kb_titles:
parts.append(kb_titles)
return "\n".join(parts).strip()
def _ollama_call(hist_key, prompt: str, *, context: str) -> str:
system = (
"System: You are Atlas, the Titan lab assistant for Atlas/Othrys. "
@ -1416,6 +1506,12 @@ def sync_loop(token: str, room_id: str):
send_msg(token, rid, structured)
continue
if _knowledge_intent(body):
summary = knowledge_summary(body, inventory)
if summary:
send_msg(token, rid, summary)
continue
reply = ollama_reply_with_thinking(
token,
rid,