comms: handle arch node counts and extend LLM timeout

This commit is contained in:
Brad Stein 2026-01-26 09:36:08 -03:00
parent 14d18048d5
commit 352d4991f4

View File

@ -16,6 +16,7 @@ ROOM_ALIAS = "#othrys:live.bstein.dev"
OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/")
MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0")
API_KEY = os.environ.get("CHAT_API_KEY", "")
OLLAMA_TIMEOUT_SEC = float(os.environ.get("OLLAMA_TIMEOUT_SEC", "90"))
KB_DIR = os.environ.get("KB_DIR", "")
VM_URL = os.environ.get("VM_URL", "http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428")
@ -525,6 +526,29 @@ def nodes_names_summary(cluster_name: str) -> str:
shown = ", ".join(names[:30])
return f"{cluster_name} node names: {shown}, … (+{len(names) - 30} more)."
def nodes_arch_summary(cluster_name: str, arch: str) -> str:
try:
data = k8s_get("/api/v1/nodes?limit=500")
except Exception:
return ""
items = data.get("items") or []
if not isinstance(items, list) or not items:
return ""
normalized = (arch or "").strip().lower()
if normalized in ("aarch64", "arm64"):
arch_label = "arm64"
elif normalized in ("x86_64", "x86-64", "amd64"):
arch_label = "amd64"
else:
arch_label = normalized
total = 0
for node in items:
labels = (node.get("metadata") or {}).get("labels") or {}
if labels.get("kubernetes.io/arch") == arch_label:
total += 1
return f"{cluster_name} cluster has {total} {arch_label} nodes."
def _strip_code_fence(text: str) -> str:
cleaned = (text or "").strip()
match = CODE_FENCE_RE.match(cleaned)
@ -622,7 +646,7 @@ def ollama_reply(hist_key, prompt: str, *, context: str) -> str:
if API_KEY:
headers["x-api-key"] = API_KEY
r = request.Request(OLLAMA_URL, data=json.dumps(payload).encode(), headers=headers)
with request.urlopen(r, timeout=20) as resp:
with request.urlopen(r, timeout=OLLAMA_TIMEOUT_SEC) as resp:
data = json.loads(resp.read().decode())
raw_reply = data.get("message") or data.get("response") or data.get("reply") or data
reply = _normalize_reply(raw_reply) or "I'm here to help."
@ -692,6 +716,19 @@ def sync_loop(token: str, room_id: str):
continue
send_msg(token, rid, summary)
continue
if "node" in lower_body and any(word in lower_body for word in ("arm64", "aarch64", "amd64", "x86_64", "x86-64")):
if any(word in lower_body for word in ("cluster", "atlas", "titan")):
arch = "arm64" if "arm64" in lower_body or "aarch64" in lower_body else "amd64"
summary = nodes_arch_summary("Atlas", arch)
if not summary:
send_msg(
token,
rid,
"I couldnt reach the cluster API to count nodes by architecture. Try again in a moment.",
)
continue
send_msg(token, rid, summary)
continue
if re.search(r"\bnode names?\b|\bnodes? named\b|\bnaming\b", lower_body):
if any(word in lower_body for word in ("cluster", "atlas", "titan")):
names_summary = nodes_names_summary("Atlas")