atlasbot: make cluster answers more narrative

This commit is contained in:
Brad Stein 2026-01-27 18:08:19 -03:00
parent 1b04e6cb00
commit e87fa4369c

View File

@ -181,6 +181,27 @@ CLUSTER_HINT_WORDS = {
"arm64",
}
_INSIGHT_HINT_WORDS = {
"interesting",
"unconventional",
"surprising",
"weird",
"odd",
"fun",
"cool",
"unique",
"notable",
}
_OVERVIEW_HINT_WORDS = {
"overview",
"summary",
"describe",
"explain",
"tell me about",
"what do you know",
}
_OLLAMA_LOCK = threading.Lock()
HARDWARE_HINTS = {
@ -1408,7 +1429,18 @@ def _nodes_summary_line(inventory: list[dict[str, Any]], snapshot: dict[str, Any
not_ready = len([n for n in inventory if n.get("ready") is False])
if total is None:
return ""
return f"Atlas cluster has {total} nodes ({ready} ready, {not_ready} not ready)."
if not_ready:
names = []
summary_names = summary.get("not_ready_names") if isinstance(summary, dict) else []
if isinstance(summary_names, list):
names = [name for name in summary_names if isinstance(name, str)]
if not names and snapshot:
details = snapshot.get("nodes_detail") if isinstance(snapshot.get("nodes_detail"), list) else []
names = [node.get("name") for node in details if isinstance(node, dict) and node.get("ready") is False]
names = [name for name in names if isinstance(name, str) and name]
suffix = f" (not ready: {', '.join(names)})" if names else ""
return f"Atlas has {total} nodes; {ready} ready, {not_ready} not ready{suffix}."
return f"Atlas has {total} nodes and all are Ready."
def _hardware_mix_line(inventory: list[dict[str, Any]]) -> str:
@ -1422,7 +1454,7 @@ def _hardware_mix_line(inventory: list[dict[str, Any]]) -> str:
parts.append(f"{key}={len(nodes)}")
if not parts:
return ""
return "Hardware mix: " + ", ".join(parts) + "."
return "Hardware mix includes " + ", ".join(parts) + "."
def _os_mix_line(snapshot: dict[str, Any] | None) -> str:
@ -1449,6 +1481,8 @@ def _pods_summary_line(metrics: dict[str, Any]) -> str:
pending = metrics.get("pods_pending")
failed = metrics.get("pods_failed")
succeeded = metrics.get("pods_succeeded")
if running is None and pending is None and failed is None and succeeded is None:
return ""
parts: list[str] = []
if running is not None:
parts.append(f"{running:.0f} running")
@ -1458,9 +1492,7 @@ def _pods_summary_line(metrics: dict[str, Any]) -> str:
parts.append(f"{failed:.0f} failed")
if succeeded is not None:
parts.append(f"{succeeded:.0f} succeeded")
if not parts:
return ""
return "Pods: " + ", ".join(parts) + "."
return "There are " + ", ".join(parts) + " pods."
def _postgres_summary_line(metrics: dict[str, Any]) -> str:
@ -1481,7 +1513,7 @@ def _postgres_summary_line(metrics: dict[str, Any]) -> str:
parts.append(f"hottest {hottest.get('label')} ({hot_val_str})")
if not parts:
return ""
return "Postgres: " + ", ".join(parts) + "."
return "Postgres is at " + ", ".join(parts) + "."
def _hottest_summary_line(metrics: dict[str, Any]) -> str:
@ -1504,7 +1536,101 @@ def _hottest_summary_line(metrics: dict[str, Any]) -> str:
parts.append(f"{key.upper()} {node} ({value_fmt})")
if not parts:
return ""
return "Hottest nodes: " + "; ".join(parts) + "."
return "Hot spots: " + "; ".join(parts) + "."
def _is_insight_query(query: str) -> bool:
q = normalize_query(query)
if not q:
return False
if any(word in q for word in _INSIGHT_HINT_WORDS):
return True
if "most" in q and any(word in q for word in ("unusual", "odd", "weird", "unconventional")):
return True
return False
def _is_overview_query(query: str) -> bool:
q = normalize_query(query)
if not q:
return False
return any(word in q for word in _OVERVIEW_HINT_WORDS)
def _doc_intent(query: str) -> bool:
q = normalize_query(query)
if not q:
return False
return any(
phrase in q
for phrase in (
"runbook",
"documentation",
"docs",
"guide",
"how do i",
"how to",
"instructions",
"playbook",
)
)
def _insight_candidates(
inventory: list[dict[str, Any]],
snapshot: dict[str, Any] | None,
) -> list[tuple[str, str, str]]:
metrics = _snapshot_metrics(snapshot)
candidates: list[tuple[str, str, str]] = []
nodes_line = _nodes_summary_line(inventory, snapshot)
if nodes_line and "not ready" in nodes_line.lower():
candidates.append(("availability", nodes_line, "high"))
hottest = metrics.get("hottest_nodes") if isinstance(metrics.get("hottest_nodes"), dict) else {}
if hottest:
cpu = hottest.get("cpu") if isinstance(hottest.get("cpu"), dict) else {}
if cpu.get("node") and cpu.get("value") is not None:
value_fmt = _format_metric_value(str(cpu.get("value")), percent=True)
candidates.append(("cpu", f"The busiest CPU right now is {cpu.get('node')} at about {value_fmt}.", "high"))
ram = hottest.get("ram") if isinstance(hottest.get("ram"), dict) else {}
if ram.get("node") and ram.get("value") is not None:
value_fmt = _format_metric_value(str(ram.get("value")), percent=True)
candidates.append(("ram", f"RAM usage peaks on {ram.get('node')} at about {value_fmt}.", "high"))
postgres_line = _postgres_summary_line(metrics)
if postgres_line:
candidates.append(("postgres", postgres_line, "high"))
hardware_line = _hardware_mix_line(inventory)
if hardware_line:
candidates.append(("hardware", hardware_line, "medium"))
pods_line = _pods_summary_line(metrics)
if pods_line:
candidates.append(("pods", pods_line, "high"))
return candidates
def _select_insight(
prompt: str,
candidates: list[tuple[str, str, str]],
) -> tuple[str, str] | None:
if not candidates:
return None
q = normalize_query(prompt)
prefer_keys: list[str] = []
if any(word in q for word in ("unconventional", "weird", "odd", "unique", "surprising")):
prefer_keys.extend(["hardware", "availability"])
if any(word in q for word in ("another", "else", "different", "other")) and len(candidates) > 1:
return candidates[1][1], candidates[1][2]
if prefer_keys:
for key, text, conf in candidates:
if key in prefer_keys:
return text, conf
key, text, conf = candidates[0]
return text, conf
def cluster_overview_answer(
@ -1517,31 +1643,21 @@ def cluster_overview_answer(
return ""
q = normalize_query(prompt)
metrics = _snapshot_metrics(snapshot)
lines: list[str] = []
sentences: list[str] = []
nodes_line = _nodes_summary_line(inventory, snapshot)
if nodes_line:
lines.append(nodes_line)
sentences.append(nodes_line)
if any(word in q for word in ("hardware", "architecture", "nodes", "node", "cluster", "atlas", "titan", "lab")):
hw_line = _hardware_mix_line(inventory)
if hw_line:
lines.append(hw_line)
os_line = _os_mix_line(snapshot)
if os_line:
lines.append(os_line)
if any(
wants_overview = _is_overview_query(q) or any(word in q for word in ("atlas", "cluster", "titan", "lab"))
wants_hardware = any(word in q for word in ("hardware", "architecture", "nodes", "node")) or wants_overview
wants_metrics = any(
word in q
for word in (
"interesting",
"status",
"health",
"overview",
"summary",
"tell me",
"what do you know",
"about",
"pods",
"postgres",
"connections",
@ -1558,20 +1674,32 @@ def cluster_overview_answer(
"usage",
"utilization",
)
):
) or wants_overview
if wants_hardware:
hw_line = _hardware_mix_line(inventory)
if hw_line:
sentences.append(hw_line)
os_line = _os_mix_line(snapshot)
if os_line:
sentences.append(os_line)
if wants_metrics:
pods_line = _pods_summary_line(metrics)
if pods_line:
lines.append(pods_line)
hottest_line = _hottest_summary_line(metrics)
if hottest_line:
lines.append(hottest_line)
sentences.append(pods_line)
postgres_line = _postgres_summary_line(metrics)
if postgres_line:
lines.append(postgres_line)
sentences.append(postgres_line)
hottest_line = _hottest_summary_line(metrics)
if hottest_line:
sentences.append(hottest_line)
if not lines:
if not sentences:
return ""
return "Based on the snapshot, " + "\n".join(lines)
if len(sentences) > 3 and not wants_overview:
sentences = sentences[:3]
return "Based on the latest snapshot, " + " ".join(sentences)
def cluster_answer(
@ -1582,6 +1710,12 @@ def cluster_answer(
workloads: list[dict[str, Any]] | None,
) -> str:
metrics_summary = snapshot_context(prompt, snapshot)
if _is_insight_query(prompt):
candidates = _insight_candidates(inventory, snapshot)
selected = _select_insight(prompt, candidates)
if selected:
text, confidence = selected
return _format_confidence(text, confidence)
structured = structured_answer(
prompt,
inventory=inventory,
@ -1602,7 +1736,7 @@ def cluster_answer(
overview = cluster_overview_answer(prompt, inventory=inventory, snapshot=snapshot)
if overview:
kb_titles = kb_retrieve_titles(prompt, limit=4) if _knowledge_intent(prompt) else ""
kb_titles = kb_retrieve_titles(prompt, limit=4) if _doc_intent(prompt) else ""
if kb_titles:
overview = overview + "\n" + kb_titles
return _format_confidence(overview, "medium")