From daf17968e90d7f8269623313299b25039e23ca59 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Thu, 29 Jan 2026 02:32:08 -0300 Subject: [PATCH] atlasbot: include events and pvc usage in snapshot --- atlasbot/snapshot/builder.py | 54 ++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/atlasbot/snapshot/builder.py b/atlasbot/snapshot/builder.py index 2a82833..6b99a37 100644 --- a/atlasbot/snapshot/builder.py +++ b/atlasbot/snapshot/builder.py @@ -80,8 +80,10 @@ def build_summary(snapshot: dict[str, Any] | None) -> dict[str, Any]: summary.update(_build_namespace_nodes(snapshot)) summary.update(_build_node_pods(snapshot)) summary.update(_build_pod_issues(snapshot)) + summary.update(_build_events(snapshot)) summary.update(_build_postgres(metrics)) summary.update(_build_hottest(metrics)) + summary.update(_build_pvc(metrics)) summary.update(_build_workloads(snapshot)) summary.update(_build_flux(snapshot)) return summary @@ -188,6 +190,13 @@ def _build_pod_issues(snapshot: dict[str, Any]) -> dict[str, Any]: return {"pod_issues": pod_issues} +def _build_events(snapshot: dict[str, Any]) -> dict[str, Any]: + events = snapshot.get("events") + if not isinstance(events, dict) or not events: + return {} + return {"events": events} + + def _build_postgres(metrics: dict[str, Any]) -> dict[str, Any]: postgres = metrics.get("postgres_connections") if isinstance(metrics.get("postgres_connections"), dict) else {} if not postgres: @@ -204,7 +213,7 @@ def _build_postgres(metrics: dict[str, Any]) -> dict[str, Any]: def _build_hottest(metrics: dict[str, Any]) -> dict[str, Any]: node_usage = metrics.get("node_usage") if isinstance(metrics.get("node_usage"), dict) else {} hottest: dict[str, Any] = {} - for key in ("cpu", "ram", "net", "io"): + for key in ("cpu", "ram", "net", "io", "disk"): top = _node_usage_top(node_usage.get(key, [])) if top: hottest[key] = top @@ -213,6 +222,13 @@ def _build_hottest(metrics: dict[str, Any]) -> dict[str, Any]: return {"hottest": hottest} +def _build_pvc(metrics: dict[str, Any]) -> dict[str, Any]: + pvc_usage = metrics.get("pvc_usage_top") if isinstance(metrics.get("pvc_usage_top"), list) else [] + if not pvc_usage: + return {} + return {"pvc_usage_top": pvc_usage} + + def _build_workloads(snapshot: dict[str, Any]) -> dict[str, Any]: workloads = snapshot.get("workloads") if isinstance(snapshot.get("workloads"), list) else [] return {"workloads": workloads} @@ -486,7 +502,7 @@ def _append_node_usage_stats(lines: list[str], summary: dict[str, Any]) -> None: if not stats: return parts = [] - for key in ("cpu", "ram", "net", "io"): + for key in ("cpu", "ram", "net", "io", "disk"): entry = stats.get(key) if isinstance(stats.get(key), dict) else {} avg = entry.get("avg") if avg is None: @@ -500,6 +516,38 @@ def _append_node_usage_stats(lines: list[str], summary: dict[str, Any]) -> None: lines.append("node_usage_avg: " + "; ".join(parts)) +def _append_events(lines: list[str], summary: dict[str, Any]) -> None: + events = summary.get("events") if isinstance(summary.get("events"), dict) else {} + if not events: + return + total = events.get("warnings_total") + by_reason = events.get("warnings_by_reason") if isinstance(events.get("warnings_by_reason"), dict) else {} + if total is None: + return + if by_reason: + top = sorted(by_reason.items(), key=lambda item: (-item[1], item[0]))[:3] + reasons = "; ".join([f"{reason}={count}" for reason, count in top]) + lines.append(f"warnings: total={total}; top={reasons}") + else: + lines.append(f"warnings: total={total}") + + +def _append_pvc_usage(lines: list[str], summary: dict[str, Any]) -> None: + pvc_usage = summary.get("pvc_usage_top") + if not isinstance(pvc_usage, list) or not pvc_usage: + return + parts = [] + for entry in pvc_usage: + metric = entry.get("metric") if isinstance(entry, dict) else {} + namespace = metric.get("namespace") + pvc = metric.get("persistentvolumeclaim") + value = entry.get("value") + if namespace and pvc: + parts.append(f"{namespace}/{pvc}={_format_float(value)}%") + if parts: + lines.append("pvc_usage_top: " + "; ".join(parts)) + + def _append_namespace_usage(lines: list[str], summary: dict[str, Any]) -> None: metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {} cpu_top = metrics.get("namespace_cpu_top") if isinstance(metrics.get("namespace_cpu_top"), list) else [] @@ -642,11 +690,13 @@ def summary_text(snapshot: dict[str, Any] | None) -> str: _append_namespace_nodes(lines, summary) _append_node_pods(lines, summary) _append_pod_issues(lines, summary) + _append_events(lines, summary) _append_node_usage_stats(lines, summary) _append_namespace_usage(lines, summary) _append_restarts(lines, summary) _append_postgres(lines, summary) _append_hottest(lines, summary) + _append_pvc_usage(lines, summary) _append_workloads(lines, summary) _append_flux(lines, summary) _append_units_windows(lines, summary)