From e87fa4369ced5b155e5fbfa940cf8f0fa22497f5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 27 Jan 2026 18:08:19 -0300 Subject: [PATCH] atlasbot: make cluster answers more narrative --- services/comms/scripts/atlasbot/bot.py | 196 +++++++++++++++++++++---- 1 file changed, 165 insertions(+), 31 deletions(-) diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py index d36844b..0dcfc60 100644 --- a/services/comms/scripts/atlasbot/bot.py +++ b/services/comms/scripts/atlasbot/bot.py @@ -181,6 +181,27 @@ CLUSTER_HINT_WORDS = { "arm64", } +_INSIGHT_HINT_WORDS = { + "interesting", + "unconventional", + "surprising", + "weird", + "odd", + "fun", + "cool", + "unique", + "notable", +} + +_OVERVIEW_HINT_WORDS = { + "overview", + "summary", + "describe", + "explain", + "tell me about", + "what do you know", +} + _OLLAMA_LOCK = threading.Lock() HARDWARE_HINTS = { @@ -1408,7 +1429,18 @@ def _nodes_summary_line(inventory: list[dict[str, Any]], snapshot: dict[str, Any not_ready = len([n for n in inventory if n.get("ready") is False]) if total is None: return "" - return f"Atlas cluster has {total} nodes ({ready} ready, {not_ready} not ready)." + if not_ready: + names = [] + summary_names = summary.get("not_ready_names") if isinstance(summary, dict) else [] + if isinstance(summary_names, list): + names = [name for name in summary_names if isinstance(name, str)] + if not names and snapshot: + details = snapshot.get("nodes_detail") if isinstance(snapshot.get("nodes_detail"), list) else [] + names = [node.get("name") for node in details if isinstance(node, dict) and node.get("ready") is False] + names = [name for name in names if isinstance(name, str) and name] + suffix = f" (not ready: {', '.join(names)})" if names else "" + return f"Atlas has {total} nodes; {ready} ready, {not_ready} not ready{suffix}." + return f"Atlas has {total} nodes and all are Ready." def _hardware_mix_line(inventory: list[dict[str, Any]]) -> str: @@ -1422,7 +1454,7 @@ def _hardware_mix_line(inventory: list[dict[str, Any]]) -> str: parts.append(f"{key}={len(nodes)}") if not parts: return "" - return "Hardware mix: " + ", ".join(parts) + "." + return "Hardware mix includes " + ", ".join(parts) + "." def _os_mix_line(snapshot: dict[str, Any] | None) -> str: @@ -1449,6 +1481,8 @@ def _pods_summary_line(metrics: dict[str, Any]) -> str: pending = metrics.get("pods_pending") failed = metrics.get("pods_failed") succeeded = metrics.get("pods_succeeded") + if running is None and pending is None and failed is None and succeeded is None: + return "" parts: list[str] = [] if running is not None: parts.append(f"{running:.0f} running") @@ -1458,9 +1492,7 @@ def _pods_summary_line(metrics: dict[str, Any]) -> str: parts.append(f"{failed:.0f} failed") if succeeded is not None: parts.append(f"{succeeded:.0f} succeeded") - if not parts: - return "" - return "Pods: " + ", ".join(parts) + "." + return "There are " + ", ".join(parts) + " pods." def _postgres_summary_line(metrics: dict[str, Any]) -> str: @@ -1481,7 +1513,7 @@ def _postgres_summary_line(metrics: dict[str, Any]) -> str: parts.append(f"hottest {hottest.get('label')} ({hot_val_str})") if not parts: return "" - return "Postgres: " + ", ".join(parts) + "." + return "Postgres is at " + ", ".join(parts) + "." def _hottest_summary_line(metrics: dict[str, Any]) -> str: @@ -1504,7 +1536,101 @@ def _hottest_summary_line(metrics: dict[str, Any]) -> str: parts.append(f"{key.upper()} {node} ({value_fmt})") if not parts: return "" - return "Hottest nodes: " + "; ".join(parts) + "." + return "Hot spots: " + "; ".join(parts) + "." + + +def _is_insight_query(query: str) -> bool: + q = normalize_query(query) + if not q: + return False + if any(word in q for word in _INSIGHT_HINT_WORDS): + return True + if "most" in q and any(word in q for word in ("unusual", "odd", "weird", "unconventional")): + return True + return False + + +def _is_overview_query(query: str) -> bool: + q = normalize_query(query) + if not q: + return False + return any(word in q for word in _OVERVIEW_HINT_WORDS) + + +def _doc_intent(query: str) -> bool: + q = normalize_query(query) + if not q: + return False + return any( + phrase in q + for phrase in ( + "runbook", + "documentation", + "docs", + "guide", + "how do i", + "how to", + "instructions", + "playbook", + ) + ) + + +def _insight_candidates( + inventory: list[dict[str, Any]], + snapshot: dict[str, Any] | None, +) -> list[tuple[str, str, str]]: + metrics = _snapshot_metrics(snapshot) + candidates: list[tuple[str, str, str]] = [] + + nodes_line = _nodes_summary_line(inventory, snapshot) + if nodes_line and "not ready" in nodes_line.lower(): + candidates.append(("availability", nodes_line, "high")) + + hottest = metrics.get("hottest_nodes") if isinstance(metrics.get("hottest_nodes"), dict) else {} + if hottest: + cpu = hottest.get("cpu") if isinstance(hottest.get("cpu"), dict) else {} + if cpu.get("node") and cpu.get("value") is not None: + value_fmt = _format_metric_value(str(cpu.get("value")), percent=True) + candidates.append(("cpu", f"The busiest CPU right now is {cpu.get('node')} at about {value_fmt}.", "high")) + ram = hottest.get("ram") if isinstance(hottest.get("ram"), dict) else {} + if ram.get("node") and ram.get("value") is not None: + value_fmt = _format_metric_value(str(ram.get("value")), percent=True) + candidates.append(("ram", f"RAM usage peaks on {ram.get('node')} at about {value_fmt}.", "high")) + + postgres_line = _postgres_summary_line(metrics) + if postgres_line: + candidates.append(("postgres", postgres_line, "high")) + + hardware_line = _hardware_mix_line(inventory) + if hardware_line: + candidates.append(("hardware", hardware_line, "medium")) + + pods_line = _pods_summary_line(metrics) + if pods_line: + candidates.append(("pods", pods_line, "high")) + + return candidates + + +def _select_insight( + prompt: str, + candidates: list[tuple[str, str, str]], +) -> tuple[str, str] | None: + if not candidates: + return None + q = normalize_query(prompt) + prefer_keys: list[str] = [] + if any(word in q for word in ("unconventional", "weird", "odd", "unique", "surprising")): + prefer_keys.extend(["hardware", "availability"]) + if any(word in q for word in ("another", "else", "different", "other")) and len(candidates) > 1: + return candidates[1][1], candidates[1][2] + if prefer_keys: + for key, text, conf in candidates: + if key in prefer_keys: + return text, conf + key, text, conf = candidates[0] + return text, conf def cluster_overview_answer( @@ -1517,31 +1643,21 @@ def cluster_overview_answer( return "" q = normalize_query(prompt) metrics = _snapshot_metrics(snapshot) - lines: list[str] = [] + sentences: list[str] = [] nodes_line = _nodes_summary_line(inventory, snapshot) if nodes_line: - lines.append(nodes_line) + sentences.append(nodes_line) - if any(word in q for word in ("hardware", "architecture", "nodes", "node", "cluster", "atlas", "titan", "lab")): - hw_line = _hardware_mix_line(inventory) - if hw_line: - lines.append(hw_line) - os_line = _os_mix_line(snapshot) - if os_line: - lines.append(os_line) - - if any( + wants_overview = _is_overview_query(q) or any(word in q for word in ("atlas", "cluster", "titan", "lab")) + wants_hardware = any(word in q for word in ("hardware", "architecture", "nodes", "node")) or wants_overview + wants_metrics = any( word in q for word in ( - "interesting", "status", "health", "overview", "summary", - "tell me", - "what do you know", - "about", "pods", "postgres", "connections", @@ -1558,20 +1674,32 @@ def cluster_overview_answer( "usage", "utilization", ) - ): + ) or wants_overview + + if wants_hardware: + hw_line = _hardware_mix_line(inventory) + if hw_line: + sentences.append(hw_line) + os_line = _os_mix_line(snapshot) + if os_line: + sentences.append(os_line) + + if wants_metrics: pods_line = _pods_summary_line(metrics) if pods_line: - lines.append(pods_line) - hottest_line = _hottest_summary_line(metrics) - if hottest_line: - lines.append(hottest_line) + sentences.append(pods_line) postgres_line = _postgres_summary_line(metrics) if postgres_line: - lines.append(postgres_line) + sentences.append(postgres_line) + hottest_line = _hottest_summary_line(metrics) + if hottest_line: + sentences.append(hottest_line) - if not lines: + if not sentences: return "" - return "Based on the snapshot, " + "\n".join(lines) + if len(sentences) > 3 and not wants_overview: + sentences = sentences[:3] + return "Based on the latest snapshot, " + " ".join(sentences) def cluster_answer( @@ -1582,6 +1710,12 @@ def cluster_answer( workloads: list[dict[str, Any]] | None, ) -> str: metrics_summary = snapshot_context(prompt, snapshot) + if _is_insight_query(prompt): + candidates = _insight_candidates(inventory, snapshot) + selected = _select_insight(prompt, candidates) + if selected: + text, confidence = selected + return _format_confidence(text, confidence) structured = structured_answer( prompt, inventory=inventory, @@ -1602,7 +1736,7 @@ def cluster_answer( overview = cluster_overview_answer(prompt, inventory=inventory, snapshot=snapshot) if overview: - kb_titles = kb_retrieve_titles(prompt, limit=4) if _knowledge_intent(prompt) else "" + kb_titles = kb_retrieve_titles(prompt, limit=4) if _doc_intent(prompt) else "" if kb_titles: overview = overview + "\n" + kb_titles return _format_confidence(overview, "medium")