snapshot: add pressure capacity and pod issues

This commit is contained in:
Brad Stein 2026-01-29 01:53:29 -03:00
parent 5bb69a9109
commit bdebddada4

View File

@ -72,11 +72,14 @@ def build_summary(snapshot: dict[str, Any] | None) -> dict[str, Any]:
if metrics:
summary["metrics"] = metrics
summary.update(_build_nodes(snapshot))
summary.update(_build_pressure(snapshot))
summary.update(_build_hardware(nodes_detail))
summary.update(_build_capacity(metrics))
summary.update(_build_pods(metrics))
summary.update(_build_namespace_pods(snapshot))
summary.update(_build_namespace_nodes(snapshot))
summary.update(_build_node_pods(snapshot))
summary.update(_build_pod_issues(snapshot))
summary.update(_build_postgres(metrics))
summary.update(_build_hottest(metrics))
summary.update(_build_workloads(snapshot))
@ -107,6 +110,14 @@ def _build_nodes(snapshot: dict[str, Any]) -> dict[str, Any]:
}
def _build_pressure(snapshot: dict[str, Any]) -> dict[str, Any]:
nodes_summary = snapshot.get("nodes_summary") if isinstance(snapshot.get("nodes_summary"), dict) else {}
pressure = nodes_summary.get("pressure_nodes") if isinstance(nodes_summary.get("pressure_nodes"), dict) else {}
if not pressure:
return {}
return {"pressure_nodes": pressure}
def _build_hardware(nodes_detail: list[dict[str, Any]]) -> dict[str, Any]:
hardware: dict[str, list[str]] = {}
for node in nodes_detail or []:
@ -133,6 +144,22 @@ def _build_pods(metrics: dict[str, Any]) -> dict[str, Any]:
return {"pods": pods}
def _build_capacity(metrics: dict[str, Any]) -> dict[str, Any]:
if not metrics:
return {}
capacity = {
"cpu": metrics.get("capacity_cpu"),
"allocatable_cpu": metrics.get("allocatable_cpu"),
"mem_bytes": metrics.get("capacity_mem_bytes"),
"allocatable_mem_bytes": metrics.get("allocatable_mem_bytes"),
"pods": metrics.get("capacity_pods"),
"allocatable_pods": metrics.get("allocatable_pods"),
}
if not any(value is not None for value in capacity.values()):
return {}
return {"capacity": capacity}
def _build_namespace_pods(snapshot: dict[str, Any]) -> dict[str, Any]:
namespaces = snapshot.get("namespace_pods")
if not isinstance(namespaces, list) or not namespaces:
@ -154,6 +181,13 @@ def _build_node_pods(snapshot: dict[str, Any]) -> dict[str, Any]:
return {"node_pods": node_pods}
def _build_pod_issues(snapshot: dict[str, Any]) -> dict[str, Any]:
pod_issues = snapshot.get("pod_issues")
if not isinstance(pod_issues, dict) or not pod_issues:
return {}
return {"pod_issues": pod_issues}
def _build_postgres(metrics: dict[str, Any]) -> dict[str, Any]:
postgres = metrics.get("postgres_connections") if isinstance(metrics.get("postgres_connections"), dict) else {}
if not postgres:
@ -286,6 +320,20 @@ def _append_hardware(lines: list[str], summary: dict[str, Any]) -> None:
lines.append("hardware: " + "; ".join(sorted(parts)))
def _append_pressure(lines: list[str], summary: dict[str, Any]) -> None:
pressure = summary.get("pressure_nodes")
if not isinstance(pressure, dict) or not pressure:
return
parts = []
for cond, nodes in sorted(pressure.items()):
if not nodes:
continue
name_list = _format_names([str(name) for name in nodes if name])
parts.append(f"{cond}={len(nodes)} ({name_list})" if name_list else f"{cond}={len(nodes)}")
if parts:
lines.append("node_pressure: " + "; ".join(parts))
def _append_pods(lines: list[str], summary: dict[str, Any]) -> None:
pods = summary.get("pods") if isinstance(summary.get("pods"), dict) else {}
if not pods:
@ -300,6 +348,27 @@ def _append_pods(lines: list[str], summary: dict[str, Any]) -> None:
)
def _append_capacity(lines: list[str], summary: dict[str, Any]) -> None:
capacity = summary.get("capacity") if isinstance(summary.get("capacity"), dict) else {}
if not capacity:
return
parts = []
if capacity.get("cpu") is not None:
parts.append(f"cpu={_format_float(capacity.get('cpu'))}")
if capacity.get("allocatable_cpu") is not None:
parts.append(f"alloc_cpu={_format_float(capacity.get('allocatable_cpu'))}")
if capacity.get("mem_bytes") is not None:
parts.append(f"mem={_format_bytes(capacity.get('mem_bytes'))}")
if capacity.get("allocatable_mem_bytes") is not None:
parts.append(f"alloc_mem={_format_bytes(capacity.get('allocatable_mem_bytes'))}")
if capacity.get("pods") is not None:
parts.append(f"pods={_format_float(capacity.get('pods'))}")
if capacity.get("allocatable_pods") is not None:
parts.append(f"alloc_pods={_format_float(capacity.get('allocatable_pods'))}")
if parts:
lines.append("capacity: " + "; ".join(parts))
def _append_namespace_pods(lines: list[str], summary: dict[str, Any]) -> None:
namespaces = summary.get("namespace_pods")
if not isinstance(namespaces, list) or not namespaces:
@ -370,6 +439,47 @@ def _append_node_pods(lines: list[str], summary: dict[str, Any]) -> None:
lines.append("node_pods_top: " + "; ".join(parts))
def _append_pod_issues(lines: list[str], summary: dict[str, Any]) -> None:
pod_issues = summary.get("pod_issues") if isinstance(summary.get("pod_issues"), dict) else {}
if not pod_issues:
return
counts_line = _format_pod_issue_counts(pod_issues)
if counts_line:
lines.append(counts_line)
top_line = _format_pod_issue_top(pod_issues)
if top_line:
lines.append(top_line)
def _format_pod_issue_counts(pod_issues: dict[str, Any]) -> str:
counts = pod_issues.get("counts") if isinstance(pod_issues.get("counts"), dict) else {}
if not counts:
return ""
parts = []
for key in ("Failed", "Pending", "Unknown"):
if key in counts:
parts.append(f"{key}={counts.get(key)}")
return "pod_issues: " + "; ".join(parts) if parts else ""
def _format_pod_issue_top(pod_issues: dict[str, Any]) -> str:
items = pod_issues.get("items") if isinstance(pod_issues.get("items"), list) else []
if not items:
return ""
top = []
for item in items[:5]:
if not isinstance(item, dict):
continue
namespace = item.get("namespace")
pod = item.get("pod")
if not namespace or not pod:
continue
phase = item.get("phase") or ""
restarts = item.get("restarts") or 0
top.append(f"{namespace}/{pod}({phase},r={restarts})")
return "pod_issues_top: " + "; ".join(top) if top else ""
def _append_node_usage_stats(lines: list[str], summary: dict[str, Any]) -> None:
metrics = summary.get("metrics") if isinstance(summary.get("metrics"), dict) else {}
stats = metrics.get("node_usage_stats") if isinstance(metrics.get("node_usage_stats"), dict) else {}
@ -524,11 +634,14 @@ def summary_text(snapshot: dict[str, Any] | None) -> str:
return ""
lines: list[str] = []
_append_nodes(lines, summary)
_append_pressure(lines, summary)
_append_hardware(lines, summary)
_append_capacity(lines, summary)
_append_pods(lines, summary)
_append_namespace_pods(lines, summary)
_append_namespace_nodes(lines, summary)
_append_node_pods(lines, summary)
_append_pod_issues(lines, summary)
_append_node_usage_stats(lines, summary)
_append_namespace_usage(lines, summary)
_append_restarts(lines, summary)