atlasbot: replace targeted handlers with generic planner

This commit is contained in:
Brad Stein 2026-01-26 22:38:18 -03:00
parent 6c413d4a50
commit 37a203509b

View File

@ -95,11 +95,29 @@ CODE_FENCE_RE = re.compile(r"^```(?:json)?\s*(.*?)\s*```$", re.DOTALL)
TITAN_NODE_RE = re.compile(r"\btitan-[0-9a-z]{2}\b", re.IGNORECASE) TITAN_NODE_RE = re.compile(r"\btitan-[0-9a-z]{2}\b", re.IGNORECASE)
TITAN_RANGE_RE = re.compile(r"\btitan-([0-9a-z]{2})/([0-9a-z]{2})\b", re.IGNORECASE) TITAN_RANGE_RE = re.compile(r"\btitan-([0-9a-z]{2})/([0-9a-z]{2})\b", re.IGNORECASE)
_DASH_CHARS = "\u2010\u2011\u2012\u2013\u2014\u2015\u2212\uFE63\uFF0D" _DASH_CHARS = "\u2010\u2011\u2012\u2013\u2014\u2015\u2212\uFE63\uFF0D"
HOTTEST_QUERIES = {
"cpu": "label_replace(topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")", OPERATION_HINTS = {
"ram": "label_replace(topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")", "count": ("how many", "count", "number", "total"),
"net": "label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_network_receive_bytes_total{device!~\"lo\"}[5m]) + rate(node_network_transmit_bytes_total{device!~\"lo\"}[5m]))) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")", "list": ("list", "which", "what are", "show", "names"),
"io": "label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m]))) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")", "top": ("top", "hottest", "highest", "most", "largest", "max", "maximum"),
"status": ("ready", "not ready", "unready", "down", "missing", "status"),
}
METRIC_HINTS = {
"cpu": ("cpu",),
"ram": ("ram", "memory", "mem"),
"net": ("net", "network", "bandwidth", "throughput"),
"io": ("io", "disk", "storage"),
"connections": ("connections", "conn", "postgres", "database", "db"),
}
HARDWARE_HINTS = {
"amd64": ("amd64", "x86", "x86_64", "x86-64"),
"jetson": ("jetson",),
"rpi4": ("rpi4",),
"rpi5": ("rpi5",),
"rpi": ("rpi", "raspberry"),
"arm64": ("arm64", "aarch64"),
} }
def normalize_query(text: str) -> str: def normalize_query(text: str) -> str:
@ -312,63 +330,127 @@ def _humanize_rate(value: str, *, unit: str) -> str:
return f"{val / 1024:.2f} KB/s" return f"{val / 1024:.2f} KB/s"
return f"{val:.2f} B/s" return f"{val:.2f} B/s"
def _hottest_query(metric: str, node_regex: str | None) -> str: def _has_any(text: str, phrases: tuple[str, ...]) -> bool:
expr = HOTTEST_QUERIES[metric] return any(p in text for p in phrases)
if node_regex:
needle = 'node_uname_info{nodename!=""}'
replacement = f'node_uname_info{{nodename!=\"\",nodename=~\"{node_regex}\"}}'
return expr.replace(needle, replacement)
return expr
def _vm_hottest(metric: str, node_regex: str | None) -> tuple[str, str] | None: def _detect_operation(q: str) -> str | None:
expr = _hottest_query(metric, node_regex) for op, phrases in OPERATION_HINTS.items():
res = vm_query(expr) if _has_any(q, phrases):
series = _vm_value_series(res) return op
if not series: return None
return None
first = series[0]
labels = first.get("metric") or {}
value = first.get("value") or []
val = value[1] if isinstance(value, list) and len(value) > 1 else ""
node = labels.get("node") or labels.get("__name__") or ""
if not node:
return None
return (str(node), str(val))
def _hottest_answer(q: str, *, nodes: list[str] | None) -> str: def _detect_metric(q: str) -> str | None:
metric = None for metric, phrases in METRIC_HINTS.items():
assumed_cpu = False if _has_any(q, phrases):
if "cpu" in q: return metric
metric = "cpu" return None
elif "ram" in q or "memory" in q:
metric = "ram"
elif "net" in q or "network" in q:
metric = "net"
elif "io" in q or "disk" in q or "storage" in q:
metric = "io"
if metric is None:
metric = "cpu"
assumed_cpu = True
if nodes is not None and not nodes:
return "No nodes match the requested hardware class."
node_regex = "|".join(nodes) if nodes else None def _detect_hardware_filters(q: str) -> tuple[set[str], set[str]]:
metrics = [metric] include: set[str] = set()
lines: list[str] = [] exclude: set[str] = set()
for m in metrics: for hardware, phrases in HARDWARE_HINTS.items():
picked = _vm_hottest(m, node_regex) for phrase in phrases:
if not picked: if f"non {phrase}" in q or f"non-{phrase}" in q or f"not {phrase}" in q:
exclude.add(hardware)
elif phrase in q:
include.add(hardware)
return include, exclude
def _detect_entity(q: str) -> str | None:
if "node" in q or "nodes" in q or "worker" in q or TITAN_NODE_RE.search(q):
return "node"
if "pod" in q or "pods" in q:
return "pod"
if "namespace" in q or "namespaces" in q:
return "namespace"
return None
def _metric_entry_score(entry: dict[str, Any], tokens: list[str], *, metric: str | None, op: str | None) -> int:
hay = _metric_tokens(entry)
score = 0
for t in set(tokens):
if t in hay:
score += 2 if t in (entry.get("panel_title") or "").lower() else 1
if metric:
for phrase in METRIC_HINTS.get(metric, (metric,)):
if phrase in hay:
score += 3
if op == "top" and ("hottest" in hay or "top" in hay):
score += 3
if "node" in hay:
score += 1
return score
def _select_metric_entry(tokens: list[str], *, metric: str | None, op: str | None) -> dict[str, Any] | None:
scored: list[tuple[int, dict[str, Any]]] = []
for entry in _METRIC_INDEX:
if not isinstance(entry, dict):
continue continue
node, val = picked score = _metric_entry_score(entry, tokens, metric=metric, op=op)
unit = "%" if m in ("cpu", "ram") else "B/s" if score:
val_str = _humanize_rate(val, unit=unit) scored.append((score, entry))
label = {"cpu": "CPU", "ram": "RAM", "net": "NET", "io": "I/O"}[m] if not scored:
lines.append(f"{label}: {node} ({val_str})") return None
if not lines: scored.sort(key=lambda item: item[0], reverse=True)
return scored[0][1]
def _apply_node_filter(expr: str, node_regex: str | None) -> str:
if not node_regex:
return expr
needle = 'node_uname_info{nodename!=""}'
replacement = f'node_uname_info{{nodename!=\"\",nodename=~\"{node_regex}\"}}'
return expr.replace(needle, replacement)
def _format_metric_answer(entry: dict[str, Any], res: dict | None) -> str:
series = _vm_value_series(res)
panel = entry.get("panel_title") or "Metric"
if not series:
return "" return ""
label = metric.upper() rendered = vm_render_result(res, limit=5)
suffix = " (defaulting to CPU)" if assumed_cpu else "" if not rendered:
return f"Hottest node by {label}: {lines[0].split(': ', 1)[1]}.{suffix}" return ""
lines = [line.lstrip("-").strip() for line in rendered.splitlines() if line.strip().startswith("-")]
if len(lines) == 1:
return f"{panel}: {lines[0]}."
return f"{panel}:\n" + "\n".join(f"- {line}" for line in lines)
def _inventory_filter(
inventory: list[dict[str, Any]],
*,
include_hw: set[str],
exclude_hw: set[str],
only_workers: bool,
only_ready: bool | None,
nodes_in_query: list[str],
) -> list[dict[str, Any]]:
results = inventory
if nodes_in_query:
results = [node for node in results if node.get("name") in nodes_in_query]
if only_workers:
results = [node for node in results if node.get("is_worker") is True]
if only_ready is True:
results = [node for node in results if node.get("ready") is True]
if only_ready is False:
results = [node for node in results if node.get("ready") is False]
if include_hw:
results = [node for node in results if _hardware_match(node, include_hw)]
if exclude_hw:
results = [node for node in results if not _hardware_match(node, exclude_hw)]
return results
def _hardware_match(node: dict[str, Any], filters: set[str]) -> bool:
hw = node.get("hardware") or ""
arch = node.get("arch") or ""
for f in filters:
if f == "rpi" and hw in ("rpi4", "rpi5"):
return True
if f == "arm64" and arch == "arm64":
return True
if hw == f:
return True
if f == "amd64" and arch == "amd64":
return True
return False
def _node_roles(labels: dict[str, Any]) -> list[str]: def _node_roles(labels: dict[str, Any]) -> list[str]:
roles: list[str] = [] roles: list[str] = []
@ -495,176 +577,103 @@ def _inventory_sets(inventory: list[dict[str, Any]]) -> dict[str, Any]:
def structured_answer(prompt: str, *, inventory: list[dict[str, Any]], metrics_summary: str) -> str: def structured_answer(prompt: str, *, inventory: list[dict[str, Any]], metrics_summary: str) -> str:
q = normalize_query(prompt) q = normalize_query(prompt)
if metrics_summary and any(word in q for word in ("postgres", "connection", "connections", "db")): if not q:
return metrics_summary
if not inventory:
return "" return ""
sets = _inventory_sets(inventory) tokens = _tokens(q)
names = sets["names"] op = _detect_operation(q)
ready = sets["ready"] metric = _detect_metric(q)
not_ready = sets["not_ready"] entity = _detect_entity(q)
groups = sets["groups"] include_hw, exclude_hw = _detect_hardware_filters(q)
worker_names = sets["worker_names"]
worker_ready = sets["worker_ready"]
worker_not_ready = sets["worker_not_ready"]
expected_workers = sets["expected_workers"]
expected_ready = sets["expected_ready"]
expected_not_ready = sets["expected_not_ready"]
expected_missing = sets["expected_missing"]
total = len(names)
nodes_in_query = _extract_titan_nodes(q) nodes_in_query = _extract_titan_nodes(q)
rpi_nodes = set(groups.get("rpi4", [])) | set(groups.get("rpi5", [])) only_workers = "worker" in q or "workers" in q
non_rpi = set(groups.get("jetson", [])) | set(groups.get("amd64", [])) only_ready: bool | None = None
unknown_hw = set(groups.get("arm64-unknown", [])) | set(groups.get("unknown", [])) if "not ready" in q or "unready" in q or "down" in q or "missing" in q:
only_ready = False
elif "ready" in q:
only_ready = True
if "hottest" in q or "hot" in q: if entity == "node" and only_ready is not None and op != "count":
filter_nodes: list[str] | None = None op = "status"
if "amd64" in q or "x86" in q:
filter_nodes = sorted(groups.get("amd64", []))
elif "jetson" in q:
filter_nodes = sorted(groups.get("jetson", []))
elif "raspberry" in q or "rpi" in q:
filter_nodes = sorted(rpi_nodes)
elif "arm64" in q:
filter_nodes = sorted([n for n in names if n not in groups.get("amd64", [])])
hottest = _hottest_answer(q, nodes=filter_nodes)
if hottest:
return hottest
return "Unable to determine hottest nodes right now (metrics unavailable)."
if nodes_in_query and ("raspberry" in q or "rpi" in q): if not op and entity == "node":
parts: list[str] = [] op = "list" if (include_hw or exclude_hw or nodes_in_query) else "count"
for node in nodes_in_query:
if node in rpi_nodes:
parts.append(f"{node} is a Raspberry Pi node.")
elif node in non_rpi:
parts.append(f"{node} is not a Raspberry Pi node.")
elif node in names:
parts.append(f"{node} is in Atlas but hardware is unknown.")
else:
parts.append(f"{node} is not in the Atlas cluster.")
return " ".join(parts)
if nodes_in_query and "jetson" in q: if op == "top" and metric is None:
jets = set(groups.get("jetson", [])) metric = "cpu"
parts = []
for node in nodes_in_query:
if node in jets:
parts.append(f"{node} is a Jetson node.")
elif node in names:
parts.append(f"{node} is not a Jetson node.")
else:
parts.append(f"{node} is not in the Atlas cluster.")
return " ".join(parts)
if nodes_in_query and ("is" in q or "part of" in q or "in atlas" in q or "in cluster" in q or "present" in q or "exist" in q): # Metrics-first when a metric or top operation is requested.
parts: list[str] = [] if metric or op == "top":
for node in nodes_in_query: entry = _select_metric_entry(tokens, metric=metric, op=op)
if node in names: if entry and isinstance(entry.get("exprs"), list) and entry["exprs"]:
parts.append(f"Yes. {node} is in the Atlas cluster.") expr = entry["exprs"][0]
else: if inventory:
parts.append(f"No. {node} is not in the Atlas cluster.") scoped = _inventory_filter(
return " ".join(parts) inventory,
include_hw=include_hw,
if any(term in q for term in ("non-raspberry", "non raspberry", "not raspberry", "non-rpi", "non rpi")): exclude_hw=exclude_hw,
non_rpi_sorted = sorted(non_rpi) only_workers=only_workers,
if any(word in q for word in ("how many", "count", "number")): only_ready=None,
return f"Atlas has {len(non_rpi_sorted)} nonRaspberry Pi nodes." nodes_in_query=nodes_in_query,
if any(phrase in q for phrase in ("besides jetson", "excluding jetson", "without jetson", "non jetson")):
amd = sorted(groups.get("amd64", []))
return f"NonRaspberry Pi nodes (excluding Jetson): {', '.join(amd)}." if amd else "No nonRaspberry Pi nodes outside Jetson."
return f"NonRaspberry Pi nodes: {', '.join(non_rpi_sorted)}." if non_rpi_sorted else "No nonRaspberry Pi nodes found."
if "jetson" in q:
jets = groups.get("jetson", [])
if any(word in q for word in ("how many", "count", "number")):
return f"Atlas has {len(jets)} Jetson nodes."
return f"Jetson nodes: {', '.join(jets)}." if jets else "No Jetson nodes found."
if "amd64" in q or "x86" in q:
amd = groups.get("amd64", [])
if any(word in q for word in ("how many", "count", "number")):
return f"Atlas has {len(amd)} amd64 nodes."
return f"amd64 nodes: {', '.join(amd)}." if amd else "No amd64 nodes found."
if "arm64" in q and "node" in q and any(word in q for word in ("how many", "count", "number")):
count = sum(1 for node in inventory if node.get("arch") == "arm64")
return f"Atlas has {count} arm64 nodes."
if "rpi4" in q:
rpi4 = groups.get("rpi4", [])
if any(word in q for word in ("how many", "count", "number")):
return f"Atlas has {len(rpi4)} rpi4 nodes."
return f"rpi4 nodes: {', '.join(rpi4)}." if rpi4 else "No rpi4 nodes found."
if "rpi5" in q:
rpi5 = groups.get("rpi5", [])
if any(word in q for word in ("how many", "count", "number")):
return f"Atlas has {len(rpi5)} rpi5 nodes."
return f"rpi5 nodes: {', '.join(rpi5)}." if rpi5 else "No rpi5 nodes found."
if "raspberry" in q or "rpi" in q:
rpi = sorted(rpi_nodes)
if any(word in q for word in ("how many", "count", "number")):
return f"Atlas has {len(rpi)} Raspberry Pi nodes."
return f"Raspberry Pi nodes: {', '.join(rpi)}." if rpi else "No Raspberry Pi nodes found."
if "arm64-unknown" in q or "unknown" in q or "no hardware" in q:
unknown = sorted(unknown_hw)
return f"Unknown hardware nodes: {', '.join(unknown)}." if unknown else "No unknown hardware labels."
if ("notready" in q or "not ready" in q or "unready" in q) and ("node" in q or "nodes" in q):
return "Not ready nodes: " + (", ".join(not_ready) if not_ready else "none") + "."
if "worker" in q and ("node" in q or "nodes" in q or "workers" in q):
not_ready_query = "not ready" in q or "unready" in q or "down" in q or ("not" in q and "ready" in q)
if expected_workers:
if "missing" in q:
return "Missing worker nodes: " + (", ".join(expected_missing) if expected_missing else "none") + "."
if "ready" in q and ("not ready" in q or "vs" in q or "versus" in q):
return (
f"Expected workers: {len(expected_ready)} ready, "
f"{len(expected_not_ready)} not ready (expected {len(expected_workers)})."
) )
if any(word in q for word in ("how many", "count", "number")) and ("expect" in q or "expected" in q or "should" in q): if scoped:
msg = f"Grafana inventory expects {len(expected_workers)} worker nodes." node_regex = "|".join([n["name"] for n in scoped])
if expected_missing: expr = _apply_node_filter(expr, node_regex)
msg += f" Missing: {', '.join(expected_missing)}." res = vm_query(expr, timeout=20)
return msg answer = _format_metric_answer(entry, res)
if not_ready_query: if answer:
if expected_not_ready or expected_missing: return answer
detail = [] if metrics_summary:
if expected_not_ready: return metrics_summary
detail.append(f"Not ready: {', '.join(expected_not_ready)}")
if expected_missing:
detail.append(f"Missing: {', '.join(expected_missing)}")
return "Worker nodes needing attention. " + " ".join(detail) + "."
return "All expected worker nodes are Ready."
if any(word in q for word in ("expected", "expect", "should")):
msg = f"Grafana inventory expects {len(expected_workers)} worker nodes."
if expected_missing:
msg += f" Missing: {', '.join(expected_missing)}."
return msg
if any(word in q for word in ("how many", "count", "number")):
return f"Worker nodes: {len(expected_ready)} ready, {len(expected_not_ready)} not ready (expected {len(expected_workers)})."
if "ready" in q:
return f"Ready worker nodes ({len(expected_ready)}): {', '.join(expected_ready)}."
if not_ready_query:
return "Worker nodes not ready: " + (", ".join(worker_not_ready) if worker_not_ready else "none") + "."
if any(word in q for word in ("how many", "count", "number")):
return f"Worker nodes: {len(worker_ready)} ready, {len(worker_not_ready)} not ready."
return "Ready worker nodes ({}): {}.".format(len(worker_ready), ", ".join(worker_ready))
if any(word in q for word in ("how many", "count", "number")) and "node" in q: if entity != "node" or not inventory:
return f"Atlas has {total} nodes; {len(ready)} ready, {len(not_ready)} not ready." if any(word in q for word in METRIC_HINT_WORDS) and not metrics_summary:
return "I don't have data to answer that right now."
return ""
if "node names" in q or ("nodes" in q and "named" in q) or "naming" in q: expected_workers = expected_worker_nodes_from_metrics()
return "Atlas node names: " + ", ".join(names) + "." filtered = _inventory_filter(
inventory,
include_hw=include_hw,
exclude_hw=exclude_hw,
only_workers=only_workers,
only_ready=only_ready if op in ("status", "count") else None,
nodes_in_query=nodes_in_query,
)
names = [node["name"] for node in filtered]
if "ready" in q and "node" in q: if op == "status":
return f"Ready nodes ({len(ready)}): {', '.join(ready)}." if "missing" in q and expected_workers:
missing = sorted(set(expected_workers) - {n["name"] for n in inventory})
return "Missing nodes: " + (", ".join(missing) if missing else "none") + "."
if only_ready is False:
return "Not ready nodes: " + (", ".join(names) if names else "none") + "."
if only_ready is True:
return f"Ready nodes ({len(names)}): " + (", ".join(names) if names else "none") + "."
if op == "count":
if expected_workers and ("expected" in q or "should" in q):
missing = sorted(set(expected_workers) - {n["name"] for n in inventory})
msg = f"Grafana inventory expects {len(expected_workers)} worker nodes."
if missing:
msg += f" Missing: {', '.join(missing)}."
return msg
if not (include_hw or exclude_hw or nodes_in_query or only_workers):
return f"Atlas has {len(names)} nodes."
return f"Matching nodes: {len(names)}."
if op == "list":
if nodes_in_query:
parts = []
existing = {n["name"] for n in inventory}
for node in nodes_in_query:
parts.append(f"{node}: {'present' if node in existing else 'not present'}")
return "Node presence: " + ", ".join(parts) + "."
if not names:
return "Matching nodes: none."
shown = names[:30]
suffix = f", … (+{len(names) - 30} more)" if len(names) > 30 else ""
return "Matching nodes: " + ", ".join(shown) + suffix + "."
return "" return ""
@ -727,25 +736,6 @@ def metrics_query_context(prompt: str, *, allow_tools: bool) -> tuple[str, str]:
fallback = _metrics_fallback_summary(panel, summary) fallback = _metrics_fallback_summary(panel, summary)
return context, fallback return context, fallback
def jetson_nodes_from_kb() -> list[str]:
for doc in KB.get("runbooks", []):
if not isinstance(doc, dict):
continue
body = str(doc.get("body") or "")
for line in body.splitlines():
if "jetson" not in line.lower():
continue
names = _extract_titan_nodes(line)
if names:
return names
return []
def jetson_nodes_summary(cluster_name: str) -> str:
names = jetson_nodes_from_kb()
if names:
return f"{cluster_name} has {len(names)} Jetson nodes: {', '.join(names)}."
return ""
def catalog_hints(query: str) -> tuple[str, list[tuple[str, str]]]: def catalog_hints(query: str) -> tuple[str, list[tuple[str, str]]]:
q = (query or "").strip() q = (query or "").strip()
if not q or not KB.get("catalog"): if not q or not KB.get("catalog"):
@ -953,22 +943,16 @@ def _parse_metric_lines(summary: str) -> dict[str, str]:
def _metrics_fallback_summary(panel: str, summary: str) -> str: def _metrics_fallback_summary(panel: str, summary: str) -> str:
parsed = _parse_metric_lines(summary) parsed = _parse_metric_lines(summary)
panel_l = (panel or "").lower() panel_l = (panel or "").lower()
if panel_l.startswith("postgres connections"): if parsed:
used = parsed.get("conn=used") items = list(parsed.items())
maxv = parsed.get("conn=max") if len(items) == 1:
if used and maxv: label, value = items[0]
try: return f"{panel}: {label} = {value}."
used_i = int(float(used)) compact = "; ".join(f"{k}={v}" for k, v in items)
max_i = int(float(maxv)) return f"{panel}: {compact}."
except ValueError: if panel_l:
return f"Postgres connections: {summary}" return f"{panel}: {summary}"
free = max_i - used_i return summary
return f"Postgres connections: {used_i}/{max_i} used ({free} free)."
if panel_l.startswith("postgres hottest"):
if parsed:
label, value = next(iter(parsed.items()))
return f"Most Postgres connections: {label} = {value}."
return f"{panel}: {summary}"
def _node_ready_status(node: dict) -> bool | None: def _node_ready_status(node: dict) -> bool | None:
conditions = node.get("status", {}).get("conditions") or [] conditions = node.get("status", {}).get("conditions") or []
@ -1075,93 +1059,6 @@ def vm_cluster_snapshot() -> str:
parts.append(pr) parts.append(pr)
return "\n".join(parts).strip() return "\n".join(parts).strip()
def nodes_summary(cluster_name: str) -> str:
state = _ariadne_state()
if state:
nodes = state.get("nodes") if isinstance(state.get("nodes"), dict) else {}
total = nodes.get("total")
ready = nodes.get("ready")
not_ready = nodes.get("not_ready")
if isinstance(total, int) and isinstance(ready, int):
not_ready = not_ready if isinstance(not_ready, int) else max(total - ready, 0)
if not_ready:
return f"{cluster_name} cluster has {total} nodes: {ready} Ready, {not_ready} NotReady."
return f"{cluster_name} cluster has {total} nodes, all Ready."
try:
data = k8s_get("/api/v1/nodes?limit=500")
except Exception:
return ""
items = data.get("items") or []
if not isinstance(items, list) or not items:
return ""
total = len(items)
ready = 0
for node in items:
conditions = node.get("status", {}).get("conditions") or []
for cond in conditions if isinstance(conditions, list) else []:
if cond.get("type") == "Ready":
if cond.get("status") == "True":
ready += 1
break
not_ready = max(total - ready, 0)
if not_ready:
return f"{cluster_name} cluster has {total} nodes: {ready} Ready, {not_ready} NotReady."
return f"{cluster_name} cluster has {total} nodes, all Ready."
def nodes_names_summary(cluster_name: str) -> str:
state = _ariadne_state()
if state:
nodes = state.get("nodes") if isinstance(state.get("nodes"), dict) else {}
names = nodes.get("names")
if isinstance(names, list) and names:
cleaned = sorted({str(n) for n in names if n})
if len(cleaned) <= 30:
return f"{cluster_name} node names: {', '.join(cleaned)}."
shown = ", ".join(cleaned[:30])
return f"{cluster_name} node names: {shown}, … (+{len(cleaned) - 30} more)."
try:
data = k8s_get("/api/v1/nodes?limit=500")
except Exception:
return ""
items = data.get("items") or []
if not isinstance(items, list) or not items:
return ""
names = []
for node in items:
name = (node.get("metadata") or {}).get("name") or ""
if name:
names.append(name)
names = sorted(set(names))
if not names:
return ""
if len(names) <= 30:
return f"{cluster_name} node names: {', '.join(names)}."
shown = ", ".join(names[:30])
return f"{cluster_name} node names: {shown}, … (+{len(names) - 30} more)."
def nodes_arch_summary(cluster_name: str, arch: str) -> str:
try:
data = k8s_get("/api/v1/nodes?limit=500")
except Exception:
return ""
items = data.get("items") or []
if not isinstance(items, list) or not items:
return ""
normalized = (arch or "").strip().lower()
if normalized in ("aarch64", "arm64"):
arch_label = "arm64"
elif normalized in ("x86_64", "x86-64", "amd64"):
arch_label = "amd64"
else:
arch_label = normalized
total = 0
for node in items:
labels = (node.get("metadata") or {}).get("labels") or {}
if labels.get("kubernetes.io/arch") == arch_label:
total += 1
return f"{cluster_name} cluster has {total} {arch_label} nodes."
def _strip_code_fence(text: str) -> str: def _strip_code_fence(text: str) -> str:
cleaned = (text or "").strip() cleaned = (text or "").strip()
match = CODE_FENCE_RE.match(cleaned) match = CODE_FENCE_RE.match(cleaned)