atlasbot: include events and pvc usage in snapshot

This commit is contained in:
Brad Stein 2026-01-29 02:32:08 -03:00
parent bdebddada4
commit daf17968e9

View File

@ -80,8 +80,10 @@ def build_summary(snapshot: dict[str, Any] | None) -> dict[str, Any]:
summary.update(_build_namespace_nodes(snapshot)) summary.update(_build_namespace_nodes(snapshot))
summary.update(_build_node_pods(snapshot)) summary.update(_build_node_pods(snapshot))
summary.update(_build_pod_issues(snapshot)) summary.update(_build_pod_issues(snapshot))
summary.update(_build_events(snapshot))
summary.update(_build_postgres(metrics)) summary.update(_build_postgres(metrics))
summary.update(_build_hottest(metrics)) summary.update(_build_hottest(metrics))
summary.update(_build_pvc(metrics))
summary.update(_build_workloads(snapshot)) summary.update(_build_workloads(snapshot))
summary.update(_build_flux(snapshot)) summary.update(_build_flux(snapshot))
return summary return summary
@ -188,6 +190,13 @@ def _build_pod_issues(snapshot: dict[str, Any]) -> dict[str, Any]:
return {"pod_issues": pod_issues} return {"pod_issues": pod_issues}
def _build_events(snapshot: dict[str, Any]) -> dict[str, Any]:
events = snapshot.get("events")
if not isinstance(events, dict) or not events:
return {}
return {"events": events}
def _build_postgres(metrics: dict[str, Any]) -> dict[str, Any]: def _build_postgres(metrics: dict[str, Any]) -> dict[str, Any]:
postgres = metrics.get("postgres_connections") if isinstance(metrics.get("postgres_connections"), dict) else {} postgres = metrics.get("postgres_connections") if isinstance(metrics.get("postgres_connections"), dict) else {}
if not postgres: if not postgres:
@ -204,7 +213,7 @@ def _build_postgres(metrics: dict[str, Any]) -> dict[str, Any]:
def _build_hottest(metrics: dict[str, Any]) -> dict[str, Any]: def _build_hottest(metrics: dict[str, Any]) -> dict[str, Any]:
node_usage = metrics.get("node_usage") if isinstance(metrics.get("node_usage"), dict) else {} node_usage = metrics.get("node_usage") if isinstance(metrics.get("node_usage"), dict) else {}
hottest: dict[str, Any] = {} hottest: dict[str, Any] = {}
for key in ("cpu", "ram", "net", "io"): for key in ("cpu", "ram", "net", "io", "disk"):
top = _node_usage_top(node_usage.get(key, [])) top = _node_usage_top(node_usage.get(key, []))
if top: if top:
hottest[key] = top hottest[key] = top
@ -213,6 +222,13 @@ def _build_hottest(metrics: dict[str, Any]) -> dict[str, Any]:
return {"hottest": hottest} return {"hottest": hottest}
def _build_pvc(metrics: dict[str, Any]) -> dict[str, Any]:
pvc_usage = metrics.get("pvc_usage_top") if isinstance(metrics.get("pvc_usage_top"), list) else []
if not pvc_usage:
return {}
return {"pvc_usage_top": pvc_usage}
def _build_workloads(snapshot: dict[str, Any]) -> dict[str, Any]: def _build_workloads(snapshot: dict[str, Any]) -> dict[str, Any]:
workloads = snapshot.get("workloads") if isinstance(snapshot.get("workloads"), list) else [] workloads = snapshot.get("workloads") if isinstance(snapshot.get("workloads"), list) else []
return {"workloads": workloads} return {"workloads": workloads}
@ -486,7 +502,7 @@ def _append_node_usage_stats(lines: list[str], summary: dict[str, Any]) -> None:
if not stats: if not stats:
return return
parts = [] parts = []
for key in ("cpu", "ram", "net", "io"): for key in ("cpu", "ram", "net", "io", "disk"):
entry = stats.get(key) if isinstance(stats.get(key), dict) else {} entry = stats.get(key) if isinstance(stats.get(key), dict) else {}
avg = entry.get("avg") avg = entry.get("avg")
if avg is None: if avg is None:
@ -500,6 +516,38 @@ def _append_node_usage_stats(lines: list[str], summary: dict[str, Any]) -> None:
lines.append("node_usage_avg: " + "; ".join(parts)) lines.append("node_usage_avg: " + "; ".join(parts))
def _append_events(lines: list[str], summary: dict[str, Any]) -> None:
events = summary.get("events") if isinstance(summary.get("events"), dict) else {}
if not events:
return
total = events.get("warnings_total")
by_reason = events.get("warnings_by_reason") if isinstance(events.get("warnings_by_reason"), dict) else {}
if total is None:
return
if by_reason:
top = sorted(by_reason.items(), key=lambda item: (-item[1], item[0]))[:3]
reasons = "; ".join([f"{reason}={count}" for reason, count in top])
lines.append(f"warnings: total={total}; top={reasons}")
else:
lines.append(f"warnings: total={total}")
def _append_pvc_usage(lines: list[str], summary: dict[str, Any]) -> None:
pvc_usage = summary.get("pvc_usage_top")
if not isinstance(pvc_usage, list) or not pvc_usage:
return
parts = []
for entry in pvc_usage:
metric = entry.get("metric") if isinstance(entry, dict) else {}
namespace = metric.get("namespace")
pvc = metric.get("persistentvolumeclaim")
value = entry.get("value")
if namespace and pvc:
parts.append(f"{namespace}/{pvc}={_format_float(value)}%")
if parts:
lines.append("pvc_usage_top: " + "; ".join(parts))
def _append_namespace_usage(lines: list[str], summary: dict[str, Any]) -> None: def _append_namespace_usage(lines: list[str], summary: dict[str, Any]) -> None:
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {} metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
cpu_top = metrics.get("namespace_cpu_top") if isinstance(metrics.get("namespace_cpu_top"), list) else [] cpu_top = metrics.get("namespace_cpu_top") if isinstance(metrics.get("namespace_cpu_top"), list) else []
@ -642,11 +690,13 @@ def summary_text(snapshot: dict[str, Any] | None) -> str:
_append_namespace_nodes(lines, summary) _append_namespace_nodes(lines, summary)
_append_node_pods(lines, summary) _append_node_pods(lines, summary)
_append_pod_issues(lines, summary) _append_pod_issues(lines, summary)
_append_events(lines, summary)
_append_node_usage_stats(lines, summary) _append_node_usage_stats(lines, summary)
_append_namespace_usage(lines, summary) _append_namespace_usage(lines, summary)
_append_restarts(lines, summary) _append_restarts(lines, summary)
_append_postgres(lines, summary) _append_postgres(lines, summary)
_append_hottest(lines, summary) _append_hottest(lines, summary)
_append_pvc_usage(lines, summary)
_append_workloads(lines, summary) _append_workloads(lines, summary)
_append_flux(lines, summary) _append_flux(lines, summary)
_append_units_windows(lines, summary) _append_units_windows(lines, summary)