From e87fa4369ced5b155e5fbfa940cf8f0fa22497f5 Mon Sep 17 00:00:00 2001
From: Brad Stein <Brad.Stein@gmail.com>
Date: Tue, 27 Jan 2026 18:08:19 -0300
Subject: [PATCH] atlasbot: make cluster answers more narrative

---
 services/comms/scripts/atlasbot/bot.py | 196 +++++++++++++++++++++----
 1 file changed, 165 insertions(+), 31 deletions(-)

diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py
index d36844b..0dcfc60 100644
--- a/services/comms/scripts/atlasbot/bot.py
+++ b/services/comms/scripts/atlasbot/bot.py
@@ -181,6 +181,27 @@ CLUSTER_HINT_WORDS = {
     "arm64",
 }
 
+_INSIGHT_HINT_WORDS = {
+    "interesting",
+    "unconventional",
+    "surprising",
+    "weird",
+    "odd",
+    "fun",
+    "cool",
+    "unique",
+    "notable",
+}
+
+_OVERVIEW_HINT_WORDS = {
+    "overview",
+    "summary",
+    "describe",
+    "explain",
+    "tell me about",
+    "what do you know",
+}
+
 _OLLAMA_LOCK = threading.Lock()
 
 HARDWARE_HINTS = {
@@ -1408,7 +1429,18 @@ def _nodes_summary_line(inventory: list[dict[str, Any]], snapshot: dict[str, Any
         not_ready = len([n for n in inventory if n.get("ready") is False])
     if total is None:
         return ""
-    return f"Atlas cluster has {total} nodes ({ready} ready, {not_ready} not ready)."
+    if not_ready:
+        names = []
+        summary_names = summary.get("not_ready_names") if isinstance(summary, dict) else []
+        if isinstance(summary_names, list):
+            names = [name for name in summary_names if isinstance(name, str)]
+        if not names and snapshot:
+            details = snapshot.get("nodes_detail") if isinstance(snapshot.get("nodes_detail"), list) else []
+            names = [node.get("name") for node in details if isinstance(node, dict) and node.get("ready") is False]
+        names = [name for name in names if isinstance(name, str) and name]
+        suffix = f" (not ready: {', '.join(names)})" if names else ""
+        return f"Atlas has {total} nodes; {ready} ready, {not_ready} not ready{suffix}."
+    return f"Atlas has {total} nodes and all are Ready."
 
 
 def _hardware_mix_line(inventory: list[dict[str, Any]]) -> str:
@@ -1422,7 +1454,7 @@ def _hardware_mix_line(inventory: list[dict[str, Any]]) -> str:
             parts.append(f"{key}={len(nodes)}")
     if not parts:
         return ""
-    return "Hardware mix: " + ", ".join(parts) + "."
+    return "Hardware mix includes " + ", ".join(parts) + "."
 
 
 def _os_mix_line(snapshot: dict[str, Any] | None) -> str:
@@ -1449,6 +1481,8 @@ def _pods_summary_line(metrics: dict[str, Any]) -> str:
     pending = metrics.get("pods_pending")
     failed = metrics.get("pods_failed")
     succeeded = metrics.get("pods_succeeded")
+    if running is None and pending is None and failed is None and succeeded is None:
+        return ""
     parts: list[str] = []
     if running is not None:
         parts.append(f"{running:.0f} running")
@@ -1458,9 +1492,7 @@ def _pods_summary_line(metrics: dict[str, Any]) -> str:
         parts.append(f"{failed:.0f} failed")
     if succeeded is not None:
         parts.append(f"{succeeded:.0f} succeeded")
-    if not parts:
-        return ""
-    return "Pods: " + ", ".join(parts) + "."
+    return "There are " + ", ".join(parts) + " pods."
 
 
 def _postgres_summary_line(metrics: dict[str, Any]) -> str:
@@ -1481,7 +1513,7 @@ def _postgres_summary_line(metrics: dict[str, Any]) -> str:
         parts.append(f"hottest {hottest.get('label')} ({hot_val_str})")
     if not parts:
         return ""
-    return "Postgres: " + ", ".join(parts) + "."
+    return "Postgres is at " + ", ".join(parts) + "."
 
 
 def _hottest_summary_line(metrics: dict[str, Any]) -> str:
@@ -1504,7 +1536,101 @@ def _hottest_summary_line(metrics: dict[str, Any]) -> str:
             parts.append(f"{key.upper()} {node} ({value_fmt})")
     if not parts:
         return ""
-    return "Hottest nodes: " + "; ".join(parts) + "."
+    return "Hot spots: " + "; ".join(parts) + "."
+
+
+def _is_insight_query(query: str) -> bool:
+    q = normalize_query(query)
+    if not q:
+        return False
+    if any(word in q for word in _INSIGHT_HINT_WORDS):
+        return True
+    if "most" in q and any(word in q for word in ("unusual", "odd", "weird", "unconventional")):
+        return True
+    return False
+
+
+def _is_overview_query(query: str) -> bool:
+    q = normalize_query(query)
+    if not q:
+        return False
+    return any(word in q for word in _OVERVIEW_HINT_WORDS)
+
+
+def _doc_intent(query: str) -> bool:
+    q = normalize_query(query)
+    if not q:
+        return False
+    return any(
+        phrase in q
+        for phrase in (
+            "runbook",
+            "documentation",
+            "docs",
+            "guide",
+            "how do i",
+            "how to",
+            "instructions",
+            "playbook",
+        )
+    )
+
+
+def _insight_candidates(
+    inventory: list[dict[str, Any]],
+    snapshot: dict[str, Any] | None,
+) -> list[tuple[str, str, str]]:
+    metrics = _snapshot_metrics(snapshot)
+    candidates: list[tuple[str, str, str]] = []
+
+    nodes_line = _nodes_summary_line(inventory, snapshot)
+    if nodes_line and "not ready" in nodes_line.lower():
+        candidates.append(("availability", nodes_line, "high"))
+
+    hottest = metrics.get("hottest_nodes") if isinstance(metrics.get("hottest_nodes"), dict) else {}
+    if hottest:
+        cpu = hottest.get("cpu") if isinstance(hottest.get("cpu"), dict) else {}
+        if cpu.get("node") and cpu.get("value") is not None:
+            value_fmt = _format_metric_value(str(cpu.get("value")), percent=True)
+            candidates.append(("cpu", f"The busiest CPU right now is {cpu.get('node')} at about {value_fmt}.", "high"))
+        ram = hottest.get("ram") if isinstance(hottest.get("ram"), dict) else {}
+        if ram.get("node") and ram.get("value") is not None:
+            value_fmt = _format_metric_value(str(ram.get("value")), percent=True)
+            candidates.append(("ram", f"RAM usage peaks on {ram.get('node')} at about {value_fmt}.", "high"))
+
+    postgres_line = _postgres_summary_line(metrics)
+    if postgres_line:
+        candidates.append(("postgres", postgres_line, "high"))
+
+    hardware_line = _hardware_mix_line(inventory)
+    if hardware_line:
+        candidates.append(("hardware", hardware_line, "medium"))
+
+    pods_line = _pods_summary_line(metrics)
+    if pods_line:
+        candidates.append(("pods", pods_line, "high"))
+
+    return candidates
+
+
+def _select_insight(
+    prompt: str,
+    candidates: list[tuple[str, str, str]],
+) -> tuple[str, str] | None:
+    if not candidates:
+        return None
+    q = normalize_query(prompt)
+    prefer_keys: list[str] = []
+    if any(word in q for word in ("unconventional", "weird", "odd", "unique", "surprising")):
+        prefer_keys.extend(["hardware", "availability"])
+    if any(word in q for word in ("another", "else", "different", "other")) and len(candidates) > 1:
+        return candidates[1][1], candidates[1][2]
+    if prefer_keys:
+        for key, text, conf in candidates:
+            if key in prefer_keys:
+                return text, conf
+    key, text, conf = candidates[0]
+    return text, conf
 
 
 def cluster_overview_answer(
@@ -1517,31 +1643,21 @@ def cluster_overview_answer(
         return ""
     q = normalize_query(prompt)
     metrics = _snapshot_metrics(snapshot)
-    lines: list[str] = []
+    sentences: list[str] = []
 
     nodes_line = _nodes_summary_line(inventory, snapshot)
     if nodes_line:
-        lines.append(nodes_line)
+        sentences.append(nodes_line)
 
-    if any(word in q for word in ("hardware", "architecture", "nodes", "node", "cluster", "atlas", "titan", "lab")):
-        hw_line = _hardware_mix_line(inventory)
-        if hw_line:
-            lines.append(hw_line)
-        os_line = _os_mix_line(snapshot)
-        if os_line:
-            lines.append(os_line)
-
-    if any(
+    wants_overview = _is_overview_query(q) or any(word in q for word in ("atlas", "cluster", "titan", "lab"))
+    wants_hardware = any(word in q for word in ("hardware", "architecture", "nodes", "node")) or wants_overview
+    wants_metrics = any(
         word in q
         for word in (
-            "interesting",
             "status",
             "health",
             "overview",
             "summary",
-            "tell me",
-            "what do you know",
-            "about",
             "pods",
             "postgres",
             "connections",
@@ -1558,20 +1674,32 @@ def cluster_overview_answer(
             "usage",
             "utilization",
         )
-    ):
+    ) or wants_overview
+
+    if wants_hardware:
+        hw_line = _hardware_mix_line(inventory)
+        if hw_line:
+            sentences.append(hw_line)
+        os_line = _os_mix_line(snapshot)
+        if os_line:
+            sentences.append(os_line)
+
+    if wants_metrics:
         pods_line = _pods_summary_line(metrics)
         if pods_line:
-            lines.append(pods_line)
-        hottest_line = _hottest_summary_line(metrics)
-        if hottest_line:
-            lines.append(hottest_line)
+            sentences.append(pods_line)
         postgres_line = _postgres_summary_line(metrics)
         if postgres_line:
-            lines.append(postgres_line)
+            sentences.append(postgres_line)
+        hottest_line = _hottest_summary_line(metrics)
+        if hottest_line:
+            sentences.append(hottest_line)
 
-    if not lines:
+    if not sentences:
         return ""
-    return "Based on the snapshot, " + "\n".join(lines)
+    if len(sentences) > 3 and not wants_overview:
+        sentences = sentences[:3]
+    return "Based on the latest snapshot, " + " ".join(sentences)
 
 
 def cluster_answer(
@@ -1582,6 +1710,12 @@ def cluster_answer(
     workloads: list[dict[str, Any]] | None,
 ) -> str:
     metrics_summary = snapshot_context(prompt, snapshot)
+    if _is_insight_query(prompt):
+        candidates = _insight_candidates(inventory, snapshot)
+        selected = _select_insight(prompt, candidates)
+        if selected:
+            text, confidence = selected
+            return _format_confidence(text, confidence)
     structured = structured_answer(
         prompt,
         inventory=inventory,
@@ -1602,7 +1736,7 @@ def cluster_answer(
 
     overview = cluster_overview_answer(prompt, inventory=inventory, snapshot=snapshot)
     if overview:
-        kb_titles = kb_retrieve_titles(prompt, limit=4) if _knowledge_intent(prompt) else ""
+        kb_titles = kb_retrieve_titles(prompt, limit=4) if _doc_intent(prompt) else ""
         if kb_titles:
             overview = overview + "\n" + kb_titles
         return _format_confidence(overview, "medium")