atlasbot: add knowledge summaries and better fallback
This commit is contained in:
parent
0ef14c67fd
commit
c219019ad5
@ -254,14 +254,14 @@ def load_kb():
|
||||
_NAME_INDEX = names
|
||||
_METRIC_INDEX = metrics if isinstance(metrics, list) else []
|
||||
|
||||
def kb_retrieve(query: str, *, limit: int = 3) -> str:
|
||||
def _score_kb_docs(query: str) -> list[dict[str, Any]]:
|
||||
q = (query or "").strip()
|
||||
if not q or not KB.get("runbooks"):
|
||||
return ""
|
||||
return []
|
||||
ql = q.lower()
|
||||
q_tokens = _tokens(q)
|
||||
if not q_tokens:
|
||||
return ""
|
||||
return []
|
||||
|
||||
scored: list[tuple[int, dict]] = []
|
||||
for doc in KB.get("runbooks", []):
|
||||
@ -281,9 +281,16 @@ def kb_retrieve(query: str, *, limit: int = 3) -> str:
|
||||
score += 4
|
||||
if score:
|
||||
scored.append((score, doc))
|
||||
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
picked = [d for _, d in scored[:limit]]
|
||||
return [d for _, d in scored]
|
||||
|
||||
|
||||
def kb_retrieve(query: str, *, limit: int = 3) -> str:
|
||||
q = (query or "").strip()
|
||||
if not q:
|
||||
return ""
|
||||
scored = _score_kb_docs(q)
|
||||
picked = scored[:limit]
|
||||
if not picked:
|
||||
return ""
|
||||
|
||||
@ -301,6 +308,22 @@ def kb_retrieve(query: str, *, limit: int = 3) -> str:
|
||||
used += len(chunk)
|
||||
return "\n".join(parts).strip()
|
||||
|
||||
|
||||
def kb_retrieve_titles(query: str, *, limit: int = 4) -> str:
|
||||
scored = _score_kb_docs(query)
|
||||
picked = scored[:limit]
|
||||
if not picked:
|
||||
return ""
|
||||
parts = ["Relevant runbooks:"]
|
||||
for doc in picked:
|
||||
title = doc.get("title") or doc.get("path") or "runbook"
|
||||
path = doc.get("path") or ""
|
||||
if path:
|
||||
parts.append(f"- {title} ({path})")
|
||||
else:
|
||||
parts.append(f"- {title}")
|
||||
return "\n".join(parts)
|
||||
|
||||
def _extract_titan_nodes(text: str) -> list[str]:
|
||||
cleaned = normalize_query(text)
|
||||
names = {n.lower() for n in TITAN_NODE_RE.findall(cleaned) if n}
|
||||
@ -439,6 +462,18 @@ def _format_metric_label(metric: dict[str, Any]) -> str:
|
||||
return ", ".join(label_parts) if label_parts else "series"
|
||||
|
||||
|
||||
def _primary_series_metric(res: dict | None) -> tuple[str | None, str | None]:
|
||||
series = _vm_value_series(res or {})
|
||||
if not series:
|
||||
return (None, None)
|
||||
first = series[0]
|
||||
metric = first.get("metric") if isinstance(first, dict) else {}
|
||||
value = first.get("value") if isinstance(first, dict) else []
|
||||
node = metric.get("node") if isinstance(metric, dict) else None
|
||||
val = value[1] if isinstance(value, list) and len(value) > 1 else None
|
||||
return (node, val)
|
||||
|
||||
|
||||
def _format_metric_answer(entry: dict[str, Any], res: dict | None) -> str:
|
||||
series = _vm_value_series(res)
|
||||
panel = entry.get("panel_title") or "Metric"
|
||||
@ -677,7 +712,15 @@ def structured_answer(prompt: str, *, inventory: list[dict[str, Any]], metrics_s
|
||||
scope_parts.append("worker")
|
||||
if scope_parts:
|
||||
scope = " ".join(scope_parts)
|
||||
return f"Among {scope} nodes, {answer}"
|
||||
overall_note = ""
|
||||
base_res = vm_query(entry["exprs"][0], timeout=20)
|
||||
base_node, base_val = _primary_series_metric(base_res)
|
||||
scoped_node, scoped_val = _primary_series_metric(res)
|
||||
if base_node and scoped_node and base_node != scoped_node:
|
||||
percent = _metric_expr_uses_percent(entry)
|
||||
base_val_fmt = _format_metric_value(base_val or "", percent=percent)
|
||||
overall_note = f" Overall hottest node: {base_node} ({base_val_fmt})."
|
||||
return f"Among {scope} nodes, {answer}{overall_note}"
|
||||
return answer
|
||||
if metrics_summary:
|
||||
return metrics_summary
|
||||
@ -1075,7 +1118,7 @@ def _context_fallback(context: str) -> str:
|
||||
trimmed = context.strip()
|
||||
if len(trimmed) > MAX_TOOL_CHARS:
|
||||
trimmed = trimmed[: MAX_TOOL_CHARS - 3].rstrip() + "..."
|
||||
return "I couldn’t reach the model backend. Here is the data I found:\n" + trimmed
|
||||
return "Here is what I found:\n" + trimmed
|
||||
|
||||
def vm_top_restarts(hours: int = 1) -> str:
|
||||
q = f"topk(5, sum by (namespace,pod) (increase(kube_pod_container_status_restarts_total[{hours}h])))"
|
||||
@ -1192,6 +1235,11 @@ class _AtlasbotHandler(BaseHTTPRequestHandler):
|
||||
return
|
||||
inventory = node_inventory_live()
|
||||
answer = structured_answer(prompt, inventory=inventory, metrics_summary="")
|
||||
if not answer and _knowledge_intent(prompt):
|
||||
answer = knowledge_summary(prompt, inventory)
|
||||
if not answer:
|
||||
kb = kb_retrieve_titles(prompt, limit=4)
|
||||
answer = kb or ""
|
||||
self._write_json(200, {"answer": answer})
|
||||
|
||||
|
||||
@ -1257,6 +1305,48 @@ def build_context(
|
||||
|
||||
return "\n\n".join([p for p in parts if p]).strip()
|
||||
|
||||
|
||||
def _knowledge_intent(prompt: str) -> bool:
|
||||
q = normalize_query(prompt)
|
||||
return any(
|
||||
phrase in q
|
||||
for phrase in (
|
||||
"what do you know",
|
||||
"tell me about",
|
||||
"overview",
|
||||
"summary",
|
||||
"describe",
|
||||
"explain",
|
||||
"what is",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def _inventory_summary(inventory: list[dict[str, Any]]) -> str:
|
||||
if not inventory:
|
||||
return ""
|
||||
groups = _group_nodes(inventory)
|
||||
total = len(inventory)
|
||||
ready = [n for n in inventory if n.get("ready") is True]
|
||||
not_ready = [n for n in inventory if n.get("ready") is False]
|
||||
parts = [f"Atlas cluster: {total} nodes ({len(ready)} ready, {len(not_ready)} not ready)."]
|
||||
for key in ("rpi5", "rpi4", "jetson", "amd64", "arm64-unknown", "unknown"):
|
||||
nodes = groups.get(key) or []
|
||||
if nodes:
|
||||
parts.append(f"- {key}: {len(nodes)} nodes ({', '.join(nodes)})")
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def knowledge_summary(prompt: str, inventory: list[dict[str, Any]]) -> str:
|
||||
parts: list[str] = []
|
||||
inv = _inventory_summary(inventory)
|
||||
if inv:
|
||||
parts.append(inv)
|
||||
kb_titles = kb_retrieve_titles(prompt, limit=4)
|
||||
if kb_titles:
|
||||
parts.append(kb_titles)
|
||||
return "\n".join(parts).strip()
|
||||
|
||||
def _ollama_call(hist_key, prompt: str, *, context: str) -> str:
|
||||
system = (
|
||||
"System: You are Atlas, the Titan lab assistant for Atlas/Othrys. "
|
||||
@ -1416,6 +1506,12 @@ def sync_loop(token: str, room_id: str):
|
||||
send_msg(token, rid, structured)
|
||||
continue
|
||||
|
||||
if _knowledge_intent(body):
|
||||
summary = knowledge_summary(body, inventory)
|
||||
if summary:
|
||||
send_msg(token, rid, summary)
|
||||
continue
|
||||
|
||||
reply = ollama_reply_with_thinking(
|
||||
token,
|
||||
rid,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user