From 352d4991f42a9ffe89fb585f0d0e17642e853828 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 26 Jan 2026 09:36:08 -0300 Subject: [PATCH] comms: handle arch node counts and extend LLM timeout --- services/comms/scripts/atlasbot/bot.py | 39 +++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py index e077620..797b601 100644 --- a/services/comms/scripts/atlasbot/bot.py +++ b/services/comms/scripts/atlasbot/bot.py @@ -16,6 +16,7 @@ ROOM_ALIAS = "#othrys:live.bstein.dev" OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/") MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0") API_KEY = os.environ.get("CHAT_API_KEY", "") +OLLAMA_TIMEOUT_SEC = float(os.environ.get("OLLAMA_TIMEOUT_SEC", "90")) KB_DIR = os.environ.get("KB_DIR", "") VM_URL = os.environ.get("VM_URL", "http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428") @@ -525,6 +526,29 @@ def nodes_names_summary(cluster_name: str) -> str: shown = ", ".join(names[:30]) return f"{cluster_name} node names: {shown}, … (+{len(names) - 30} more)." + +def nodes_arch_summary(cluster_name: str, arch: str) -> str: + try: + data = k8s_get("/api/v1/nodes?limit=500") + except Exception: + return "" + items = data.get("items") or [] + if not isinstance(items, list) or not items: + return "" + normalized = (arch or "").strip().lower() + if normalized in ("aarch64", "arm64"): + arch_label = "arm64" + elif normalized in ("x86_64", "x86-64", "amd64"): + arch_label = "amd64" + else: + arch_label = normalized + total = 0 + for node in items: + labels = (node.get("metadata") or {}).get("labels") or {} + if labels.get("kubernetes.io/arch") == arch_label: + total += 1 + return f"{cluster_name} cluster has {total} {arch_label} nodes." + def _strip_code_fence(text: str) -> str: cleaned = (text or "").strip() match = CODE_FENCE_RE.match(cleaned) @@ -622,7 +646,7 @@ def ollama_reply(hist_key, prompt: str, *, context: str) -> str: if API_KEY: headers["x-api-key"] = API_KEY r = request.Request(OLLAMA_URL, data=json.dumps(payload).encode(), headers=headers) - with request.urlopen(r, timeout=20) as resp: + with request.urlopen(r, timeout=OLLAMA_TIMEOUT_SEC) as resp: data = json.loads(resp.read().decode()) raw_reply = data.get("message") or data.get("response") or data.get("reply") or data reply = _normalize_reply(raw_reply) or "I'm here to help." @@ -692,6 +716,19 @@ def sync_loop(token: str, room_id: str): continue send_msg(token, rid, summary) continue + if "node" in lower_body and any(word in lower_body for word in ("arm64", "aarch64", "amd64", "x86_64", "x86-64")): + if any(word in lower_body for word in ("cluster", "atlas", "titan")): + arch = "arm64" if "arm64" in lower_body or "aarch64" in lower_body else "amd64" + summary = nodes_arch_summary("Atlas", arch) + if not summary: + send_msg( + token, + rid, + "I couldn’t reach the cluster API to count nodes by architecture. Try again in a moment.", + ) + continue + send_msg(token, rid, summary) + continue if re.search(r"\bnode names?\b|\bnodes? named\b|\bnaming\b", lower_body): if any(word in lower_body for word in ("cluster", "atlas", "titan")): names_summary = nodes_names_summary("Atlas")