diff --git a/services/comms/scripts/atlasbot/bot.py b/services/comms/scripts/atlasbot/bot.py index 233b25e..987df7a 100644 --- a/services/comms/scripts/atlasbot/bot.py +++ b/services/comms/scripts/atlasbot/bot.py @@ -95,11 +95,29 @@ CODE_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL) TITAN_NODE_RE = re.compile(r"\btitan-[0-9a-z]{2}\b", re.IGNORECASE) TITAN_RANGE_RE = re.compile(r"\btitan-([0-9a-z]{2})/([0-9a-z]{2})\b", re.IGNORECASE) _DASH_CHARS = "\u2010\u2011\u2012\u2013\u2014\u2015\u2212\uFE63\uFF0D" -HOTTEST_QUERIES = { - "cpu": "label_replace(topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")", - "ram": "label_replace(topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")", - "net": "label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_network_receive_bytes_total{device!~\"lo\"}[5m]) + rate(node_network_transmit_bytes_total{device!~\"lo\"}[5m]))) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")", - "io": "label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m]))) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")", + +OPERATION_HINTS = { + "count": ("how many", "count", "number", "total"), + "list": ("list", "which", "what are", "show", "names"), + "top": ("top", "hottest", "highest", "most", "largest", "max", "maximum"), + "status": ("ready", "not ready", "unready", "down", "missing", "status"), +} + +METRIC_HINTS = { + "cpu": ("cpu",), + "ram": ("ram", "memory", "mem"), + "net": ("net", "network", "bandwidth", "throughput"), + "io": ("io", "disk", "storage"), + "connections": ("connections", "conn", "postgres", "database", "db"), +} + +HARDWARE_HINTS = { + "amd64": ("amd64", "x86", "x86_64", "x86-64"), + "jetson": ("jetson",), + "rpi4": ("rpi4",), + "rpi5": ("rpi5",), + "rpi": ("rpi", "raspberry"), + "arm64": ("arm64", "aarch64"), } def normalize_query(text: str) -> str: @@ -312,63 +330,127 @@ def _humanize_rate(value: str, *, unit: str) -> str: return f"{val / 1024:.2f} KB/s" return f"{val:.2f} B/s" -def _hottest_query(metric: str, node_regex: str | None) -> str: - expr = HOTTEST_QUERIES[metric] - if node_regex: - needle = 'node_uname_info{nodename!=""}' - replacement = f'node_uname_info{{nodename!=\"\",nodename=~\"{node_regex}\"}}' - return expr.replace(needle, replacement) - return expr +def _has_any(text: str, phrases: tuple[str, ...]) -> bool: + return any(p in text for p in phrases) -def _vm_hottest(metric: str, node_regex: str | None) -> tuple[str, str] | None: - expr = _hottest_query(metric, node_regex) - res = vm_query(expr) - series = _vm_value_series(res) - if not series: - return None - first = series[0] - labels = first.get("metric") or {} - value = first.get("value") or [] - val = value[1] if isinstance(value, list) and len(value) > 1 else "" - node = labels.get("node") or labels.get("__name__") or "" - if not node: - return None - return (str(node), str(val)) +def _detect_operation(q: str) -> str | None: + for op, phrases in OPERATION_HINTS.items(): + if _has_any(q, phrases): + return op + return None -def _hottest_answer(q: str, *, nodes: list[str] | None) -> str: - metric = None - assumed_cpu = False - if "cpu" in q: - metric = "cpu" - elif "ram" in q or "memory" in q: - metric = "ram" - elif "net" in q or "network" in q: - metric = "net" - elif "io" in q or "disk" in q or "storage" in q: - metric = "io" - if metric is None: - metric = "cpu" - assumed_cpu = True - if nodes is not None and not nodes: - return "No nodes match the requested hardware class." +def _detect_metric(q: str) -> str | None: + for metric, phrases in METRIC_HINTS.items(): + if _has_any(q, phrases): + return metric + return None - node_regex = "|".join(nodes) if nodes else None - metrics = [metric] - lines: list[str] = [] - for m in metrics: - picked = _vm_hottest(m, node_regex) - if not picked: +def _detect_hardware_filters(q: str) -> tuple[set[str], set[str]]: + include: set[str] = set() + exclude: set[str] = set() + for hardware, phrases in HARDWARE_HINTS.items(): + for phrase in phrases: + if f"non {phrase}" in q or f"non-{phrase}" in q or f"not {phrase}" in q: + exclude.add(hardware) + elif phrase in q: + include.add(hardware) + return include, exclude + +def _detect_entity(q: str) -> str | None: + if "node" in q or "nodes" in q or "worker" in q or TITAN_NODE_RE.search(q): + return "node" + if "pod" in q or "pods" in q: + return "pod" + if "namespace" in q or "namespaces" in q: + return "namespace" + return None + +def _metric_entry_score(entry: dict[str, Any], tokens: list[str], *, metric: str | None, op: str | None) -> int: + hay = _metric_tokens(entry) + score = 0 + for t in set(tokens): + if t in hay: + score += 2 if t in (entry.get("panel_title") or "").lower() else 1 + if metric: + for phrase in METRIC_HINTS.get(metric, (metric,)): + if phrase in hay: + score += 3 + if op == "top" and ("hottest" in hay or "top" in hay): + score += 3 + if "node" in hay: + score += 1 + return score + +def _select_metric_entry(tokens: list[str], *, metric: str | None, op: str | None) -> dict[str, Any] | None: + scored: list[tuple[int, dict[str, Any]]] = [] + for entry in _METRIC_INDEX: + if not isinstance(entry, dict): continue - node, val = picked - unit = "%" if m in ("cpu", "ram") else "B/s" - val_str = _humanize_rate(val, unit=unit) - label = {"cpu": "CPU", "ram": "RAM", "net": "NET", "io": "I/O"}[m] - lines.append(f"{label}: {node} ({val_str})") - if not lines: + score = _metric_entry_score(entry, tokens, metric=metric, op=op) + if score: + scored.append((score, entry)) + if not scored: + return None + scored.sort(key=lambda item: item[0], reverse=True) + return scored[0][1] + +def _apply_node_filter(expr: str, node_regex: str | None) -> str: + if not node_regex: + return expr + needle = 'node_uname_info{nodename!=""}' + replacement = f'node_uname_info{{nodename!=\"\",nodename=~\"{node_regex}\"}}' + return expr.replace(needle, replacement) + +def _format_metric_answer(entry: dict[str, Any], res: dict | None) -> str: + series = _vm_value_series(res) + panel = entry.get("panel_title") or "Metric" + if not series: return "" - label = metric.upper() - suffix = " (defaulting to CPU)" if assumed_cpu else "" - return f"Hottest node by {label}: {lines[0].split(': ', 1)[1]}.{suffix}" + rendered = vm_render_result(res, limit=5) + if not rendered: + return "" + lines = [line.lstrip("-").strip() for line in rendered.splitlines() if line.strip().startswith("-")] + if len(lines) == 1: + return f"{panel}: {lines[0]}." + return f"{panel}:\n" + "\n".join(f"- {line}" for line in lines) + +def _inventory_filter( + inventory: list[dict[str, Any]], + *, + include_hw: set[str], + exclude_hw: set[str], + only_workers: bool, + only_ready: bool | None, + nodes_in_query: list[str], +) -> list[dict[str, Any]]: + results = inventory + if nodes_in_query: + results = [node for node in results if node.get("name") in nodes_in_query] + if only_workers: + results = [node for node in results if node.get("is_worker") is True] + if only_ready is True: + results = [node for node in results if node.get("ready") is True] + if only_ready is False: + results = [node for node in results if node.get("ready") is False] + if include_hw: + results = [node for node in results if _hardware_match(node, include_hw)] + if exclude_hw: + results = [node for node in results if not _hardware_match(node, exclude_hw)] + return results + +def _hardware_match(node: dict[str, Any], filters: set[str]) -> bool: + hw = node.get("hardware") or "" + arch = node.get("arch") or "" + for f in filters: + if f == "rpi" and hw in ("rpi4", "rpi5"): + return True + if f == "arm64" and arch == "arm64": + return True + if hw == f: + return True + if f == "amd64" and arch == "amd64": + return True + return False def _node_roles(labels: dict[str, Any]) -> list[str]: roles: list[str] = [] @@ -495,176 +577,103 @@ def _inventory_sets(inventory: list[dict[str, Any]]) -> dict[str, Any]: def structured_answer(prompt: str, *, inventory: list[dict[str, Any]], metrics_summary: str) -> str: q = normalize_query(prompt) - if metrics_summary and any(word in q for word in ("postgres", "connection", "connections", "db")): - return metrics_summary - - if not inventory: + if not q: return "" - sets = _inventory_sets(inventory) - names = sets["names"] - ready = sets["ready"] - not_ready = sets["not_ready"] - groups = sets["groups"] - worker_names = sets["worker_names"] - worker_ready = sets["worker_ready"] - worker_not_ready = sets["worker_not_ready"] - expected_workers = sets["expected_workers"] - expected_ready = sets["expected_ready"] - expected_not_ready = sets["expected_not_ready"] - expected_missing = sets["expected_missing"] - total = len(names) + tokens = _tokens(q) + op = _detect_operation(q) + metric = _detect_metric(q) + entity = _detect_entity(q) + include_hw, exclude_hw = _detect_hardware_filters(q) nodes_in_query = _extract_titan_nodes(q) - rpi_nodes = set(groups.get("rpi4", [])) | set(groups.get("rpi5", [])) - non_rpi = set(groups.get("jetson", [])) | set(groups.get("amd64", [])) - unknown_hw = set(groups.get("arm64-unknown", [])) | set(groups.get("unknown", [])) + only_workers = "worker" in q or "workers" in q + only_ready: bool | None = None + if "not ready" in q or "unready" in q or "down" in q or "missing" in q: + only_ready = False + elif "ready" in q: + only_ready = True - if "hottest" in q or "hot" in q: - filter_nodes: list[str] | None = None - if "amd64" in q or "x86" in q: - filter_nodes = sorted(groups.get("amd64", [])) - elif "jetson" in q: - filter_nodes = sorted(groups.get("jetson", [])) - elif "raspberry" in q or "rpi" in q: - filter_nodes = sorted(rpi_nodes) - elif "arm64" in q: - filter_nodes = sorted([n for n in names if n not in groups.get("amd64", [])]) - hottest = _hottest_answer(q, nodes=filter_nodes) - if hottest: - return hottest - return "Unable to determine hottest nodes right now (metrics unavailable)." + if entity == "node" and only_ready is not None and op != "count": + op = "status" - if nodes_in_query and ("raspberry" in q or "rpi" in q): - parts: list[str] = [] - for node in nodes_in_query: - if node in rpi_nodes: - parts.append(f"{node} is a Raspberry Pi node.") - elif node in non_rpi: - parts.append(f"{node} is not a Raspberry Pi node.") - elif node in names: - parts.append(f"{node} is in Atlas but hardware is unknown.") - else: - parts.append(f"{node} is not in the Atlas cluster.") - return " ".join(parts) + if not op and entity == "node": + op = "list" if (include_hw or exclude_hw or nodes_in_query) else "count" - if nodes_in_query and "jetson" in q: - jets = set(groups.get("jetson", [])) - parts = [] - for node in nodes_in_query: - if node in jets: - parts.append(f"{node} is a Jetson node.") - elif node in names: - parts.append(f"{node} is not a Jetson node.") - else: - parts.append(f"{node} is not in the Atlas cluster.") - return " ".join(parts) + if op == "top" and metric is None: + metric = "cpu" - if nodes_in_query and ("is" in q or "part of" in q or "in atlas" in q or "in cluster" in q or "present" in q or "exist" in q): - parts: list[str] = [] - for node in nodes_in_query: - if node in names: - parts.append(f"Yes. {node} is in the Atlas cluster.") - else: - parts.append(f"No. {node} is not in the Atlas cluster.") - return " ".join(parts) - - if any(term in q for term in ("non-raspberry", "non raspberry", "not raspberry", "non-rpi", "non rpi")): - non_rpi_sorted = sorted(non_rpi) - if any(word in q for word in ("how many", "count", "number")): - return f"Atlas has {len(non_rpi_sorted)} non‑Raspberry Pi nodes." - if any(phrase in q for phrase in ("besides jetson", "excluding jetson", "without jetson", "non jetson")): - amd = sorted(groups.get("amd64", [])) - return f"Non‑Raspberry Pi nodes (excluding Jetson): {', '.join(amd)}." if amd else "No non‑Raspberry Pi nodes outside Jetson." - return f"Non‑Raspberry Pi nodes: {', '.join(non_rpi_sorted)}." if non_rpi_sorted else "No non‑Raspberry Pi nodes found." - - if "jetson" in q: - jets = groups.get("jetson", []) - if any(word in q for word in ("how many", "count", "number")): - return f"Atlas has {len(jets)} Jetson nodes." - return f"Jetson nodes: {', '.join(jets)}." if jets else "No Jetson nodes found." - - if "amd64" in q or "x86" in q: - amd = groups.get("amd64", []) - if any(word in q for word in ("how many", "count", "number")): - return f"Atlas has {len(amd)} amd64 nodes." - return f"amd64 nodes: {', '.join(amd)}." if amd else "No amd64 nodes found." - - if "arm64" in q and "node" in q and any(word in q for word in ("how many", "count", "number")): - count = sum(1 for node in inventory if node.get("arch") == "arm64") - return f"Atlas has {count} arm64 nodes." - - if "rpi4" in q: - rpi4 = groups.get("rpi4", []) - if any(word in q for word in ("how many", "count", "number")): - return f"Atlas has {len(rpi4)} rpi4 nodes." - return f"rpi4 nodes: {', '.join(rpi4)}." if rpi4 else "No rpi4 nodes found." - - if "rpi5" in q: - rpi5 = groups.get("rpi5", []) - if any(word in q for word in ("how many", "count", "number")): - return f"Atlas has {len(rpi5)} rpi5 nodes." - return f"rpi5 nodes: {', '.join(rpi5)}." if rpi5 else "No rpi5 nodes found." - - if "raspberry" in q or "rpi" in q: - rpi = sorted(rpi_nodes) - if any(word in q for word in ("how many", "count", "number")): - return f"Atlas has {len(rpi)} Raspberry Pi nodes." - return f"Raspberry Pi nodes: {', '.join(rpi)}." if rpi else "No Raspberry Pi nodes found." - - if "arm64-unknown" in q or "unknown" in q or "no hardware" in q: - unknown = sorted(unknown_hw) - return f"Unknown hardware nodes: {', '.join(unknown)}." if unknown else "No unknown hardware labels." - - if ("notready" in q or "not ready" in q or "unready" in q) and ("node" in q or "nodes" in q): - return "Not ready nodes: " + (", ".join(not_ready) if not_ready else "none") + "." - - if "worker" in q and ("node" in q or "nodes" in q or "workers" in q): - not_ready_query = "not ready" in q or "unready" in q or "down" in q or ("not" in q and "ready" in q) - if expected_workers: - if "missing" in q: - return "Missing worker nodes: " + (", ".join(expected_missing) if expected_missing else "none") + "." - if "ready" in q and ("not ready" in q or "vs" in q or "versus" in q): - return ( - f"Expected workers: {len(expected_ready)} ready, " - f"{len(expected_not_ready)} not ready (expected {len(expected_workers)})." + # Metrics-first when a metric or top operation is requested. + if metric or op == "top": + entry = _select_metric_entry(tokens, metric=metric, op=op) + if entry and isinstance(entry.get("exprs"), list) and entry["exprs"]: + expr = entry["exprs"][0] + if inventory: + scoped = _inventory_filter( + inventory, + include_hw=include_hw, + exclude_hw=exclude_hw, + only_workers=only_workers, + only_ready=None, + nodes_in_query=nodes_in_query, ) - if any(word in q for word in ("how many", "count", "number")) and ("expect" in q or "expected" in q or "should" in q): - msg = f"Grafana inventory expects {len(expected_workers)} worker nodes." - if expected_missing: - msg += f" Missing: {', '.join(expected_missing)}." - return msg - if not_ready_query: - if expected_not_ready or expected_missing: - detail = [] - if expected_not_ready: - detail.append(f"Not ready: {', '.join(expected_not_ready)}") - if expected_missing: - detail.append(f"Missing: {', '.join(expected_missing)}") - return "Worker nodes needing attention. " + " ".join(detail) + "." - return "All expected worker nodes are Ready." - if any(word in q for word in ("expected", "expect", "should")): - msg = f"Grafana inventory expects {len(expected_workers)} worker nodes." - if expected_missing: - msg += f" Missing: {', '.join(expected_missing)}." - return msg - if any(word in q for word in ("how many", "count", "number")): - return f"Worker nodes: {len(expected_ready)} ready, {len(expected_not_ready)} not ready (expected {len(expected_workers)})." - if "ready" in q: - return f"Ready worker nodes ({len(expected_ready)}): {', '.join(expected_ready)}." - if not_ready_query: - return "Worker nodes not ready: " + (", ".join(worker_not_ready) if worker_not_ready else "none") + "." - if any(word in q for word in ("how many", "count", "number")): - return f"Worker nodes: {len(worker_ready)} ready, {len(worker_not_ready)} not ready." - return "Ready worker nodes ({}): {}.".format(len(worker_ready), ", ".join(worker_ready)) + if scoped: + node_regex = "|".join([n["name"] for n in scoped]) + expr = _apply_node_filter(expr, node_regex) + res = vm_query(expr, timeout=20) + answer = _format_metric_answer(entry, res) + if answer: + return answer + if metrics_summary: + return metrics_summary - if any(word in q for word in ("how many", "count", "number")) and "node" in q: - return f"Atlas has {total} nodes; {len(ready)} ready, {len(not_ready)} not ready." + if entity != "node" or not inventory: + if any(word in q for word in METRIC_HINT_WORDS) and not metrics_summary: + return "I don't have data to answer that right now." + return "" - if "node names" in q or ("nodes" in q and "named" in q) or "naming" in q: - return "Atlas node names: " + ", ".join(names) + "." + expected_workers = expected_worker_nodes_from_metrics() + filtered = _inventory_filter( + inventory, + include_hw=include_hw, + exclude_hw=exclude_hw, + only_workers=only_workers, + only_ready=only_ready if op in ("status", "count") else None, + nodes_in_query=nodes_in_query, + ) + names = [node["name"] for node in filtered] - if "ready" in q and "node" in q: - return f"Ready nodes ({len(ready)}): {', '.join(ready)}." + if op == "status": + if "missing" in q and expected_workers: + missing = sorted(set(expected_workers) - {n["name"] for n in inventory}) + return "Missing nodes: " + (", ".join(missing) if missing else "none") + "." + if only_ready is False: + return "Not ready nodes: " + (", ".join(names) if names else "none") + "." + if only_ready is True: + return f"Ready nodes ({len(names)}): " + (", ".join(names) if names else "none") + "." + + if op == "count": + if expected_workers and ("expected" in q or "should" in q): + missing = sorted(set(expected_workers) - {n["name"] for n in inventory}) + msg = f"Grafana inventory expects {len(expected_workers)} worker nodes." + if missing: + msg += f" Missing: {', '.join(missing)}." + return msg + if not (include_hw or exclude_hw or nodes_in_query or only_workers): + return f"Atlas has {len(names)} nodes." + return f"Matching nodes: {len(names)}." + + if op == "list": + if nodes_in_query: + parts = [] + existing = {n["name"] for n in inventory} + for node in nodes_in_query: + parts.append(f"{node}: {'present' if node in existing else 'not present'}") + return "Node presence: " + ", ".join(parts) + "." + if not names: + return "Matching nodes: none." + shown = names[:30] + suffix = f", … (+{len(names) - 30} more)" if len(names) > 30 else "" + return "Matching nodes: " + ", ".join(shown) + suffix + "." return "" @@ -727,25 +736,6 @@ def metrics_query_context(prompt: str, *, allow_tools: bool) -> tuple[str, str]: fallback = _metrics_fallback_summary(panel, summary) return context, fallback -def jetson_nodes_from_kb() -> list[str]: - for doc in KB.get("runbooks", []): - if not isinstance(doc, dict): - continue - body = str(doc.get("body") or "") - for line in body.splitlines(): - if "jetson" not in line.lower(): - continue - names = _extract_titan_nodes(line) - if names: - return names - return [] - -def jetson_nodes_summary(cluster_name: str) -> str: - names = jetson_nodes_from_kb() - if names: - return f"{cluster_name} has {len(names)} Jetson nodes: {', '.join(names)}." - return "" - def catalog_hints(query: str) -> tuple[str, list[tuple[str, str]]]: q = (query or "").strip() if not q or not KB.get("catalog"): @@ -953,22 +943,16 @@ def _parse_metric_lines(summary: str) -> dict[str, str]: def _metrics_fallback_summary(panel: str, summary: str) -> str: parsed = _parse_metric_lines(summary) panel_l = (panel or "").lower() - if panel_l.startswith("postgres connections"): - used = parsed.get("conn=used") - maxv = parsed.get("conn=max") - if used and maxv: - try: - used_i = int(float(used)) - max_i = int(float(maxv)) - except ValueError: - return f"Postgres connections: {summary}" - free = max_i - used_i - return f"Postgres connections: {used_i}/{max_i} used ({free} free)." - if panel_l.startswith("postgres hottest"): - if parsed: - label, value = next(iter(parsed.items())) - return f"Most Postgres connections: {label} = {value}." - return f"{panel}: {summary}" + if parsed: + items = list(parsed.items()) + if len(items) == 1: + label, value = items[0] + return f"{panel}: {label} = {value}." + compact = "; ".join(f"{k}={v}" for k, v in items) + return f"{panel}: {compact}." + if panel_l: + return f"{panel}: {summary}" + return summary def _node_ready_status(node: dict) -> bool | None: conditions = node.get("status", {}).get("conditions") or [] @@ -1075,93 +1059,6 @@ def vm_cluster_snapshot() -> str: parts.append(pr) return "\n".join(parts).strip() -def nodes_summary(cluster_name: str) -> str: - state = _ariadne_state() - if state: - nodes = state.get("nodes") if isinstance(state.get("nodes"), dict) else {} - total = nodes.get("total") - ready = nodes.get("ready") - not_ready = nodes.get("not_ready") - if isinstance(total, int) and isinstance(ready, int): - not_ready = not_ready if isinstance(not_ready, int) else max(total - ready, 0) - if not_ready: - return f"{cluster_name} cluster has {total} nodes: {ready} Ready, {not_ready} NotReady." - return f"{cluster_name} cluster has {total} nodes, all Ready." - try: - data = k8s_get("/api/v1/nodes?limit=500") - except Exception: - return "" - items = data.get("items") or [] - if not isinstance(items, list) or not items: - return "" - total = len(items) - ready = 0 - for node in items: - conditions = node.get("status", {}).get("conditions") or [] - for cond in conditions if isinstance(conditions, list) else []: - if cond.get("type") == "Ready": - if cond.get("status") == "True": - ready += 1 - break - not_ready = max(total - ready, 0) - if not_ready: - return f"{cluster_name} cluster has {total} nodes: {ready} Ready, {not_ready} NotReady." - return f"{cluster_name} cluster has {total} nodes, all Ready." - -def nodes_names_summary(cluster_name: str) -> str: - state = _ariadne_state() - if state: - nodes = state.get("nodes") if isinstance(state.get("nodes"), dict) else {} - names = nodes.get("names") - if isinstance(names, list) and names: - cleaned = sorted({str(n) for n in names if n}) - if len(cleaned) <= 30: - return f"{cluster_name} node names: {', '.join(cleaned)}." - shown = ", ".join(cleaned[:30]) - return f"{cluster_name} node names: {shown}, … (+{len(cleaned) - 30} more)." - try: - data = k8s_get("/api/v1/nodes?limit=500") - except Exception: - return "" - items = data.get("items") or [] - if not isinstance(items, list) or not items: - return "" - names = [] - for node in items: - name = (node.get("metadata") or {}).get("name") or "" - if name: - names.append(name) - names = sorted(set(names)) - if not names: - return "" - if len(names) <= 30: - return f"{cluster_name} node names: {', '.join(names)}." - shown = ", ".join(names[:30]) - return f"{cluster_name} node names: {shown}, … (+{len(names) - 30} more)." - - -def nodes_arch_summary(cluster_name: str, arch: str) -> str: - try: - data = k8s_get("/api/v1/nodes?limit=500") - except Exception: - return "" - items = data.get("items") or [] - if not isinstance(items, list) or not items: - return "" - normalized = (arch or "").strip().lower() - if normalized in ("aarch64", "arm64"): - arch_label = "arm64" - elif normalized in ("x86_64", "x86-64", "amd64"): - arch_label = "amd64" - else: - arch_label = normalized - total = 0 - for node in items: - labels = (node.get("metadata") or {}).get("labels") or {} - if labels.get("kubernetes.io/arch") == arch_label: - total += 1 - return f"{cluster_name} cluster has {total} {arch_label} nodes." - def _strip_code_fence(text: str) -> str: cleaned = (text or "").strip() match = CODE_FENCE_RE.match(cleaned)