From bf2d4cff907d841cd0a04abecc60fe24087d7696 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 26 Jan 2026 19:29:26 -0300 Subject: [PATCH] atlasbot: answer from live inventory --- services/comms/scripts/atlasbot/bot.py | 123 +++++++++++++++++++++++-- 1 file changed, 116 insertions(+), 7 deletions(-) diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py index e070ead..6fc654b 100644 --- a/services/comms/scripts/atlasbot/bot.py +++ b/services/comms/scripts/atlasbot/bot.py @@ -334,11 +334,12 @@ def _group_nodes(inventory: list[dict[str, Any]]) -> dict[str, list[str]]: grouped[node.get("hardware") or "unknown"].append(node["name"]) return {k: sorted(v) for k, v in grouped.items()} -def node_inventory_context(query: str) -> str: +def node_inventory_context(query: str, inventory: list[dict[str, Any]] | None = None) -> str: q = (query or "").lower() if not any(word in q for word in ("node", "nodes", "raspberry", "rpi", "jetson", "amd64", "hardware", "cluster")): return "" - inventory = node_inventory_live() + if inventory is None: + inventory = node_inventory_live() if not inventory: return "" groups = _group_nodes(inventory) @@ -370,6 +371,101 @@ def node_inventory_context(query: str) -> str: lines.append(f"- workers_missing (derived): {', '.join(missing)}") return "\n".join(lines) +def node_inventory_for_prompt(prompt: str) -> list[dict[str, Any]]: + q = (prompt or "").lower() + if any(word in q for word in ("node", "nodes", "raspberry", "rpi", "jetson", "amd64", "hardware", "cluster", "worker")): + return node_inventory_live() + return [] + +def _inventory_sets(inventory: list[dict[str, Any]]) -> dict[str, Any]: + names = [node["name"] for node in inventory] + ready = [node["name"] for node in inventory if node.get("ready") is True] + not_ready = [node["name"] for node in inventory if node.get("ready") is False] + groups = _group_nodes(inventory) + return { + "names": sorted(names), + "ready": sorted(ready), + "not_ready": sorted(not_ready), + "groups": groups, + } + +def structured_answer(prompt: str, *, inventory: list[dict[str, Any]], metrics_summary: str) -> str: + q = (prompt or "").lower() + if metrics_summary and any(word in q for word in ("postgres", "connection", "connections", "db")): + return metrics_summary + + if not inventory: + return "" + + sets = _inventory_sets(inventory) + names = sets["names"] + ready = sets["ready"] + not_ready = sets["not_ready"] + groups = sets["groups"] + total = len(names) + + for node in _extract_titan_nodes(q): + if node and ("is" in q or "part of" in q or "in atlas" in q or "in cluster" in q): + if node in names: + return f"Yes. {node} is in the Atlas cluster." + return f"No. {node} is not in the Atlas cluster." + + if any(word in q for word in ("how many", "count", "number")) and "node" in q and "worker" not in q: + return f"Atlas has {total} nodes; {len(ready)} ready, {len(not_ready)} not ready." + + if "node names" in q or ("nodes" in q and "named" in q) or "naming" in q: + return "Atlas node names: " + ", ".join(names) + "." + + if "ready" in q and "node" in q and "worker" in q: + if "not ready" in q or "unready" in q or "down" in q: + return "Worker nodes not ready: " + (", ".join(not_ready) if not_ready else "none") + "." + return "Ready worker nodes ({}): {}.".format(len(ready), ", ".join(ready)) + + if "worker" in q and any(word in q for word in ("missing", "expected", "should")): + expected_workers = expected_worker_nodes_from_metrics() + missing = sorted(set(expected_workers) - set(ready + not_ready)) if expected_workers else [] + if "missing" in q and missing: + return "Missing worker nodes: " + ", ".join(missing) + "." + if expected_workers: + msg = f"Grafana inventory expects {len(expected_workers)} workers." + if missing: + msg += f" Missing: {', '.join(missing)}." + return msg + return "No expected worker inventory found; using live cluster state." + + if "worker" in q and "node" in q and "ready" not in q and "missing" not in q: + return f"Worker nodes: {len(ready)} ready, {len(not_ready)} not ready." + + if "jetson" in q: + jets = groups.get("jetson", []) + return f"Jetson nodes: {', '.join(jets)}." if jets else "No Jetson nodes found." + + if "amd64" in q or "x86" in q: + amd = groups.get("amd64", []) + return f"amd64 nodes: {', '.join(amd)}." if amd else "No amd64 nodes found." + + if "rpi4" in q: + rpi4 = groups.get("rpi4", []) + return f"rpi4 nodes: {', '.join(rpi4)}." if rpi4 else "No rpi4 nodes found." + + if "rpi5" in q: + rpi5 = groups.get("rpi5", []) + return f"rpi5 nodes: {', '.join(rpi5)}." if rpi5 else "No rpi5 nodes found." + + if "raspberry" in q or "rpi" in q: + rpi = sorted(set(groups.get("rpi4", [])) | set(groups.get("rpi5", []))) + return f"Raspberry Pi nodes: {', '.join(rpi)}." if rpi else "No Raspberry Pi nodes found." + + if "non-raspberry" in q or "non raspberry" in q or "not raspberry" in q: + non_rpi = sorted(set(groups.get("jetson", [])) | set(groups.get("amd64", []))) + return f"Non‑Raspberry Pi nodes: {', '.join(non_rpi)}." if non_rpi else "No non‑Raspberry Pi nodes found." + + if "arm64-unknown" in q or "unknown" in q: + unknown = sorted(set(groups.get("arm64-unknown", [])) | set(groups.get("unknown", []))) + return f"Unknown hardware nodes: {', '.join(unknown)}." if unknown else "No unknown hardware labels." + + return "" + def _metric_tokens(entry: dict[str, Any]) -> str: parts: list[str] = [] for key in ("panel_title", "dashboard", "description"): @@ -900,7 +996,13 @@ history = collections.defaultdict(list) # (room_id, sender|None) -> list[str] ( def key_for(room_id: str, sender: str, is_dm: bool): return (room_id, None) if is_dm else (room_id, sender) -def build_context(prompt: str, *, allow_tools: bool, targets: list[tuple[str, str]]) -> str: +def build_context( + prompt: str, + *, + allow_tools: bool, + targets: list[tuple[str, str]], + inventory: list[dict[str, Any]] | None = None, +) -> str: parts: list[str] = [] kb = kb_retrieve(prompt) @@ -911,9 +1013,9 @@ def build_context(prompt: str, *, allow_tools: bool, targets: list[tuple[str, st if endpoints: parts.append(endpoints) - inventory = node_inventory_context(prompt) - if inventory: - parts.append(inventory) + node_ctx = node_inventory_context(prompt, inventory) + if node_ctx: + parts.append(node_ctx) if allow_tools: # Scope pod summaries to relevant namespaces/workloads when possible. @@ -1083,7 +1185,8 @@ def sync_loop(token: str, room_id: str): if isinstance(w, dict) and w.get("name"): targets.append((ns, str(w["name"]))) - context = build_context(body, allow_tools=allow_tools, targets=targets) + inventory = node_inventory_for_prompt(body) + context = build_context(body, allow_tools=allow_tools, targets=targets, inventory=inventory) if allow_tools and promql: res = vm_query(promql, timeout=20) rendered = vm_render_result(res, limit=15) or "(no results)" @@ -1096,6 +1199,12 @@ def sync_loop(token: str, room_id: str): fallback = metrics_fallback or "" if not fallback and context: fallback = _context_fallback(context) + + structured = structured_answer(body, inventory=inventory, metrics_summary=metrics_fallback or "") + if structured: + send_msg(token, rid, structured) + continue + reply = ollama_reply_with_thinking( token, rid,