comms: handle arch node counts and extend LLM timeout
This commit is contained in:
parent
14d18048d5
commit
352d4991f4
@ -16,6 +16,7 @@ ROOM_ALIAS = "#othrys:live.bstein.dev"
|
||||
OLLAMA_URL = os.environ.get("OLLAMA_URL", "https://chat.ai.bstein.dev/")
|
||||
MODEL = os.environ.get("OLLAMA_MODEL", "qwen2.5-coder:7b-instruct-q4_0")
|
||||
API_KEY = os.environ.get("CHAT_API_KEY", "")
|
||||
OLLAMA_TIMEOUT_SEC = float(os.environ.get("OLLAMA_TIMEOUT_SEC", "90"))
|
||||
|
||||
KB_DIR = os.environ.get("KB_DIR", "")
|
||||
VM_URL = os.environ.get("VM_URL", "http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428")
|
||||
@ -525,6 +526,29 @@ def nodes_names_summary(cluster_name: str) -> str:
|
||||
shown = ", ".join(names[:30])
|
||||
return f"{cluster_name} node names: {shown}, … (+{len(names) - 30} more)."
|
||||
|
||||
|
||||
def nodes_arch_summary(cluster_name: str, arch: str) -> str:
|
||||
try:
|
||||
data = k8s_get("/api/v1/nodes?limit=500")
|
||||
except Exception:
|
||||
return ""
|
||||
items = data.get("items") or []
|
||||
if not isinstance(items, list) or not items:
|
||||
return ""
|
||||
normalized = (arch or "").strip().lower()
|
||||
if normalized in ("aarch64", "arm64"):
|
||||
arch_label = "arm64"
|
||||
elif normalized in ("x86_64", "x86-64", "amd64"):
|
||||
arch_label = "amd64"
|
||||
else:
|
||||
arch_label = normalized
|
||||
total = 0
|
||||
for node in items:
|
||||
labels = (node.get("metadata") or {}).get("labels") or {}
|
||||
if labels.get("kubernetes.io/arch") == arch_label:
|
||||
total += 1
|
||||
return f"{cluster_name} cluster has {total} {arch_label} nodes."
|
||||
|
||||
def _strip_code_fence(text: str) -> str:
|
||||
cleaned = (text or "").strip()
|
||||
match = CODE_FENCE_RE.match(cleaned)
|
||||
@ -622,7 +646,7 @@ def ollama_reply(hist_key, prompt: str, *, context: str) -> str:
|
||||
if API_KEY:
|
||||
headers["x-api-key"] = API_KEY
|
||||
r = request.Request(OLLAMA_URL, data=json.dumps(payload).encode(), headers=headers)
|
||||
with request.urlopen(r, timeout=20) as resp:
|
||||
with request.urlopen(r, timeout=OLLAMA_TIMEOUT_SEC) as resp:
|
||||
data = json.loads(resp.read().decode())
|
||||
raw_reply = data.get("message") or data.get("response") or data.get("reply") or data
|
||||
reply = _normalize_reply(raw_reply) or "I'm here to help."
|
||||
@ -692,6 +716,19 @@ def sync_loop(token: str, room_id: str):
|
||||
continue
|
||||
send_msg(token, rid, summary)
|
||||
continue
|
||||
if "node" in lower_body and any(word in lower_body for word in ("arm64", "aarch64", "amd64", "x86_64", "x86-64")):
|
||||
if any(word in lower_body for word in ("cluster", "atlas", "titan")):
|
||||
arch = "arm64" if "arm64" in lower_body or "aarch64" in lower_body else "amd64"
|
||||
summary = nodes_arch_summary("Atlas", arch)
|
||||
if not summary:
|
||||
send_msg(
|
||||
token,
|
||||
rid,
|
||||
"I couldn’t reach the cluster API to count nodes by architecture. Try again in a moment.",
|
||||
)
|
||||
continue
|
||||
send_msg(token, rid, summary)
|
||||
continue
|
||||
if re.search(r"\bnode names?\b|\bnodes? named\b|\bnaming\b", lower_body):
|
||||
if any(word in lower_body for word in ("cluster", "atlas", "titan")):
|
||||
names_summary = nodes_names_summary("Atlas")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user