atlasbot: make cluster answers more narrative

2026-01-27 18:08:19 -03:00 · 2026-01-27 18:08:19 -03:00 · e87fa4369c
commit e87fa4369c
parent 1b04e6cb00
1 changed files with 165 additions and 31 deletions
--- a/services/comms/scripts/atlasbot/bot.py
+++ b/services/comms/scripts/atlasbot/bot.py
@ -181,6 +181,27 @@ CLUSTER_HINT_WORDS = {
    "arm64",
 }

+_INSIGHT_HINT_WORDS = {
+    "interesting",
+    "unconventional",
+    "surprising",
+    "weird",
+    "odd",
+    "fun",
+    "cool",
+    "unique",
+    "notable",
+}
+
+_OVERVIEW_HINT_WORDS = {
+    "overview",
+    "summary",
+    "describe",
+    "explain",
+    "tell me about",
+    "what do you know",
+}
+
 _OLLAMA_LOCK = threading.Lock()

 HARDWARE_HINTS = {
@ -1408,7 +1429,18 @@ def _nodes_summary_line(inventory: list[dict[str, Any]], snapshot: dict[str, Any
        not_ready = len([n for n in inventory if n.get("ready") is False])
    if total is None:
        return ""
-    return f"Atlas cluster has {total} nodes ({ready} ready, {not_ready} not ready)."
+    if not_ready:
+        names = []
+        summary_names = summary.get("not_ready_names") if isinstance(summary, dict) else []
+        if isinstance(summary_names, list):
+            names = [name for name in summary_names if isinstance(name, str)]
+        if not names and snapshot:
+            details = snapshot.get("nodes_detail") if isinstance(snapshot.get("nodes_detail"), list) else []
+            names = [node.get("name") for node in details if isinstance(node, dict) and node.get("ready") is False]
+        names = [name for name in names if isinstance(name, str) and name]
+        suffix = f" (not ready: {', '.join(names)})" if names else ""
+        return f"Atlas has {total} nodes; {ready} ready, {not_ready} not ready{suffix}."
+    return f"Atlas has {total} nodes and all are Ready."


 def _hardware_mix_line(inventory: list[dict[str, Any]]) -> str:
@ -1422,7 +1454,7 @@ def _hardware_mix_line(inventory: list[dict[str, Any]]) -> str:
            parts.append(f"{key}={len(nodes)}")
    if not parts:
        return ""
-    return "Hardware mix: " + ", ".join(parts) + "."
+    return "Hardware mix includes " + ", ".join(parts) + "."


 def _os_mix_line(snapshot: dict[str, Any] | None) -> str:
@ -1449,6 +1481,8 @@ def _pods_summary_line(metrics: dict[str, Any]) -> str:
    pending = metrics.get("pods_pending")
    failed = metrics.get("pods_failed")
    succeeded = metrics.get("pods_succeeded")
+    if running is None and pending is None and failed is None and succeeded is None:
+        return ""
    parts: list[str] = []
    if running is not None:
        parts.append(f"{running:.0f} running")
@ -1458,9 +1492,7 @@ def _pods_summary_line(metrics: dict[str, Any]) -> str:
        parts.append(f"{failed:.0f} failed")
    if succeeded is not None:
        parts.append(f"{succeeded:.0f} succeeded")
-    if not parts:
-        return ""
-    return "Pods: " + ", ".join(parts) + "."
+    return "There are " + ", ".join(parts) + " pods."


 def _postgres_summary_line(metrics: dict[str, Any]) -> str:
@ -1481,7 +1513,7 @@ def _postgres_summary_line(metrics: dict[str, Any]) -> str:
        parts.append(f"hottest {hottest.get('label')} ({hot_val_str})")
    if not parts:
        return ""
-    return "Postgres: " + ", ".join(parts) + "."
+    return "Postgres is at " + ", ".join(parts) + "."


 def _hottest_summary_line(metrics: dict[str, Any]) -> str:
@ -1504,7 +1536,101 @@ def _hottest_summary_line(metrics: dict[str, Any]) -> str:
            parts.append(f"{key.upper()} {node} ({value_fmt})")
    if not parts:
        return ""
-    return "Hottest nodes: " + "; ".join(parts) + "."
+    return "Hot spots: " + "; ".join(parts) + "."
+
+
+def _is_insight_query(query: str) -> bool:
+    q = normalize_query(query)
+    if not q:
+        return False
+    if any(word in q for word in _INSIGHT_HINT_WORDS):
+        return True
+    if "most" in q and any(word in q for word in ("unusual", "odd", "weird", "unconventional")):
+        return True
+    return False
+
+
+def _is_overview_query(query: str) -> bool:
+    q = normalize_query(query)
+    if not q:
+        return False
+    return any(word in q for word in _OVERVIEW_HINT_WORDS)
+
+
+def _doc_intent(query: str) -> bool:
+    q = normalize_query(query)
+    if not q:
+        return False
+    return any(
+        phrase in q
+        for phrase in (
+            "runbook",
+            "documentation",
+            "docs",
+            "guide",
+            "how do i",
+            "how to",
+            "instructions",
+            "playbook",
+        )
+    )
+
+
+def _insight_candidates(
+    inventory: list[dict[str, Any]],
+    snapshot: dict[str, Any] | None,
+) -> list[tuple[str, str, str]]:
+    metrics = _snapshot_metrics(snapshot)
+    candidates: list[tuple[str, str, str]] = []
+
+    nodes_line = _nodes_summary_line(inventory, snapshot)
+    if nodes_line and "not ready" in nodes_line.lower():
+        candidates.append(("availability", nodes_line, "high"))
+
+    hottest = metrics.get("hottest_nodes") if isinstance(metrics.get("hottest_nodes"), dict) else {}
+    if hottest:
+        cpu = hottest.get("cpu") if isinstance(hottest.get("cpu"), dict) else {}
+        if cpu.get("node") and cpu.get("value") is not None:
+            value_fmt = _format_metric_value(str(cpu.get("value")), percent=True)
+            candidates.append(("cpu", f"The busiest CPU right now is {cpu.get('node')} at about {value_fmt}.", "high"))
+        ram = hottest.get("ram") if isinstance(hottest.get("ram"), dict) else {}
+        if ram.get("node") and ram.get("value") is not None:
+            value_fmt = _format_metric_value(str(ram.get("value")), percent=True)
+            candidates.append(("ram", f"RAM usage peaks on {ram.get('node')} at about {value_fmt}.", "high"))
+
+    postgres_line = _postgres_summary_line(metrics)
+    if postgres_line:
+        candidates.append(("postgres", postgres_line, "high"))
+
+    hardware_line = _hardware_mix_line(inventory)
+    if hardware_line:
+        candidates.append(("hardware", hardware_line, "medium"))
+
+    pods_line = _pods_summary_line(metrics)
+    if pods_line:
+        candidates.append(("pods", pods_line, "high"))
+
+    return candidates
+
+
+def _select_insight(
+    prompt: str,
+    candidates: list[tuple[str, str, str]],
+) -> tuple[str, str] | None:
+    if not candidates:
+        return None
+    q = normalize_query(prompt)
+    prefer_keys: list[str] = []
+    if any(word in q for word in ("unconventional", "weird", "odd", "unique", "surprising")):
+        prefer_keys.extend(["hardware", "availability"])
+    if any(word in q for word in ("another", "else", "different", "other")) and len(candidates) > 1:
+        return candidates[1][1], candidates[1][2]
+    if prefer_keys:
+        for key, text, conf in candidates:
+            if key in prefer_keys:
+                return text, conf
+    key, text, conf = candidates[0]
+    return text, conf


 def cluster_overview_answer(
@ -1517,31 +1643,21 @@ def cluster_overview_answer(
        return ""
    q = normalize_query(prompt)
    metrics = _snapshot_metrics(snapshot)
-    lines: list[str] = []
+    sentences: list[str] = []

    nodes_line = _nodes_summary_line(inventory, snapshot)
    if nodes_line:
-        lines.append(nodes_line)
+        sentences.append(nodes_line)

-    if any(word in q for word in ("hardware", "architecture", "nodes", "node", "cluster", "atlas", "titan", "lab")):
-        hw_line = _hardware_mix_line(inventory)
-        if hw_line:
-            lines.append(hw_line)
-        os_line = _os_mix_line(snapshot)
-        if os_line:
-            lines.append(os_line)
-
-    if any(
+    wants_overview = _is_overview_query(q) or any(word in q for word in ("atlas", "cluster", "titan", "lab"))
+    wants_hardware = any(word in q for word in ("hardware", "architecture", "nodes", "node")) or wants_overview
+    wants_metrics = any(
        word in q
        for word in (
-            "interesting",
            "status",
            "health",
            "overview",
            "summary",
-            "tell me",
-            "what do you know",
-            "about",
            "pods",
            "postgres",
            "connections",
@ -1558,20 +1674,32 @@ def cluster_overview_answer(
            "usage",
            "utilization",
        )
-    ):
+    ) or wants_overview
+
+    if wants_hardware:
+        hw_line = _hardware_mix_line(inventory)
+        if hw_line:
+            sentences.append(hw_line)
+        os_line = _os_mix_line(snapshot)
+        if os_line:
+            sentences.append(os_line)
+
+    if wants_metrics:
        pods_line = _pods_summary_line(metrics)
        if pods_line:
-            lines.append(pods_line)
-        hottest_line = _hottest_summary_line(metrics)
-        if hottest_line:
-            lines.append(hottest_line)
+            sentences.append(pods_line)
        postgres_line = _postgres_summary_line(metrics)
        if postgres_line:
-            lines.append(postgres_line)
+            sentences.append(postgres_line)
+        hottest_line = _hottest_summary_line(metrics)
+        if hottest_line:
+            sentences.append(hottest_line)

-    if not lines:
+    if not sentences:
        return ""
-    return "Based on the snapshot, " + "\n".join(lines)
+    if len(sentences) > 3 and not wants_overview:
+        sentences = sentences[:3]
+    return "Based on the latest snapshot, " + " ".join(sentences)


 def cluster_answer(
@ -1582,6 +1710,12 @@ def cluster_answer(
    workloads: list[dict[str, Any]] | None,
 ) -> str:
    metrics_summary = snapshot_context(prompt, snapshot)
+    if _is_insight_query(prompt):
+        candidates = _insight_candidates(inventory, snapshot)
+        selected = _select_insight(prompt, candidates)
+        if selected:
+            text, confidence = selected
+            return _format_confidence(text, confidence)
    structured = structured_answer(
        prompt,
        inventory=inventory,
@ -1602,7 +1736,7 @@ def cluster_answer(

    overview = cluster_overview_answer(prompt, inventory=inventory, snapshot=snapshot)
    if overview:
-        kb_titles = kb_retrieve_titles(prompt, limit=4) if _knowledge_intent(prompt) else ""
+        kb_titles = kb_retrieve_titles(prompt, limit=4) if _doc_intent(prompt) else ""
        if kb_titles:
            overview = overview + "\n" + kb_titles
        return _format_confidence(overview, "medium")