From e82cca7cd5ee5949133d5f92b5cbe93d967eac98 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 28 Jan 2026 19:57:36 -0300 Subject: [PATCH] atlasbot: enrich snapshot summary context --- atlasbot/snapshot/builder.py | 86 +++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/atlasbot/snapshot/builder.py b/atlasbot/snapshot/builder.py index 86c5baa..077b381 100644 --- a/atlasbot/snapshot/builder.py +++ b/atlasbot/snapshot/builder.py @@ -62,6 +62,10 @@ def build_summary(snapshot: dict[str, Any] | None) -> dict[str, Any]: metrics = _metrics(snapshot) summary: dict[str, Any] = {} + if isinstance(snapshot.get("nodes_summary"), dict): + summary["nodes_summary"] = snapshot.get("nodes_summary") + if metrics: + summary["metrics"] = metrics summary.update(_build_nodes(snapshot)) summary.update(_build_hardware(nodes_detail)) summary.update(_build_pods(metrics)) @@ -164,6 +168,25 @@ def _format_float(value: Any) -> str: return f"{numeric:.2f}".rstrip("0").rstrip(".") +def _format_rate_bytes(value: Any) -> str: + try: + numeric = float(value) + except (TypeError, ValueError): + return str(value) + if numeric >= 1024 * 1024: + return f"{numeric / (1024 * 1024):.2f} MB/s" + if numeric >= 1024: + return f"{numeric / 1024:.2f} KB/s" + return f"{numeric:.2f} B/s" + + +def _format_kv_map(values: dict[str, Any]) -> str: + parts = [] + for key, value in values.items(): + parts.append(f"{key}={value}") + return ", ".join(parts) + + def _format_names(names: list[str]) -> str: if not names: return "" @@ -178,13 +201,32 @@ def summary_text(snapshot: dict[str, Any] | None) -> str: nodes = summary.get("nodes") if isinstance(summary.get("nodes"), dict) else {} if nodes: + workers = {} + if isinstance(summary.get("nodes_summary"), dict): + workers = summary["nodes_summary"].get("workers") or {} + workers_total = workers.get("total") + workers_ready = workers.get("ready") + workers_str = "" + if workers_total is not None and workers_ready is not None: + workers_str = f", workers_ready={workers_ready}/{workers_total}" lines.append( - "nodes: total={total}, ready={ready}, not_ready={not_ready}".format( + "nodes: total={total}, ready={ready}, not_ready={not_ready}{workers}".format( total=nodes.get("total"), ready=nodes.get("ready"), not_ready=nodes.get("not_ready"), + workers=workers_str, ) ) + if isinstance(summary.get("nodes_summary"), dict): + not_ready_names = summary["nodes_summary"].get("not_ready_names") or [] + if not_ready_names: + lines.append("nodes_not_ready: " + _format_names(not_ready_names)) + by_arch = summary["nodes_summary"].get("by_arch") or {} + if isinstance(by_arch, dict) and by_arch: + lines.append("archs: " + _format_kv_map(by_arch)) + by_role = summary["nodes_summary"].get("by_role") or {} + if isinstance(by_role, dict) and by_role: + lines.append("roles: " + _format_kv_map(by_role)) hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {} if hardware: @@ -210,6 +252,22 @@ def summary_text(snapshot: dict[str, Any] | None) -> str: succeeded=pods.get("succeeded"), ) ) + if isinstance(summary.get("metrics"), dict): + top_restarts = summary["metrics"].get("top_restarts_1h") or [] + if isinstance(top_restarts, list) and top_restarts: + parts = [] + for entry in top_restarts: + metric = entry.get("metric") if isinstance(entry, dict) else {} + value = entry.get("value") if isinstance(entry, dict) else [] + if not isinstance(metric, dict) or not isinstance(value, list) or len(value) < 2: + continue + namespace = metric.get("namespace") + pod = metric.get("pod") + count = _format_float(value[1]) + if namespace and pod: + parts.append(f"{namespace}/{pod}={count}") + if parts: + lines.append("restarts_1h_top: " + "; ".join(parts)) postgres = summary.get("postgres") if isinstance(summary.get("postgres"), dict) else {} if postgres: @@ -229,7 +287,10 @@ def summary_text(snapshot: dict[str, Any] | None) -> str: if not isinstance(entry, dict): continue node = entry.get("node") - value = _format_float(entry.get("value")) + if key in {"net", "io"}: + value = _format_rate_bytes(entry.get("value")) + else: + value = _format_float(entry.get("value")) if node: parts.append(f"{key}={node} ({value})") if parts: @@ -238,6 +299,24 @@ def summary_text(snapshot: dict[str, Any] | None) -> str: workloads = summary.get("workloads") if isinstance(workloads, list) and workloads: lines.append(f"workloads: total={len(workloads)}") + top_workloads = sorted( + (item for item in workloads if isinstance(item, dict)), + key=lambda item: (-int(item.get("pods_total") or 0), item.get("workload") or ""), + )[:5] + if top_workloads: + parts = [] + for item in top_workloads: + namespace = item.get("namespace") + name = item.get("workload") + pods_total = item.get("pods_total") + primary = item.get("primary_node") + if namespace and name: + label = f"{namespace}/{name}={pods_total}" + if primary: + label = f"{label} (primary={primary})" + parts.append(label) + if parts: + lines.append("workloads_top: " + "; ".join(parts)) flux = summary.get("flux") if isinstance(summary.get("flux"), dict) else {} if flux: @@ -245,4 +324,7 @@ def summary_text(snapshot: dict[str, Any] | None) -> str: if not_ready is not None: lines.append(f"flux_not_ready: {not_ready}") + lines.append("units: cpu_pct, ram_pct, net=bytes_per_sec, io=bytes_per_sec") + lines.append("windows: rates=5m, restarts=1h") + return "\n".join(lines)