atlasbot: enrich snapshot summary context
This commit is contained in:
parent
2641ffe9ca
commit
e82cca7cd5
@ -62,6 +62,10 @@ def build_summary(snapshot: dict[str, Any] | None) -> dict[str, Any]:
|
||||
metrics = _metrics(snapshot)
|
||||
summary: dict[str, Any] = {}
|
||||
|
||||
if isinstance(snapshot.get("nodes_summary"), dict):
|
||||
summary["nodes_summary"] = snapshot.get("nodes_summary")
|
||||
if metrics:
|
||||
summary["metrics"] = metrics
|
||||
summary.update(_build_nodes(snapshot))
|
||||
summary.update(_build_hardware(nodes_detail))
|
||||
summary.update(_build_pods(metrics))
|
||||
@ -164,6 +168,25 @@ def _format_float(value: Any) -> str:
|
||||
return f"{numeric:.2f}".rstrip("0").rstrip(".")
|
||||
|
||||
|
||||
def _format_rate_bytes(value: Any) -> str:
|
||||
try:
|
||||
numeric = float(value)
|
||||
except (TypeError, ValueError):
|
||||
return str(value)
|
||||
if numeric >= 1024 * 1024:
|
||||
return f"{numeric / (1024 * 1024):.2f} MB/s"
|
||||
if numeric >= 1024:
|
||||
return f"{numeric / 1024:.2f} KB/s"
|
||||
return f"{numeric:.2f} B/s"
|
||||
|
||||
|
||||
def _format_kv_map(values: dict[str, Any]) -> str:
|
||||
parts = []
|
||||
for key, value in values.items():
|
||||
parts.append(f"{key}={value}")
|
||||
return ", ".join(parts)
|
||||
|
||||
|
||||
def _format_names(names: list[str]) -> str:
|
||||
if not names:
|
||||
return ""
|
||||
@ -178,13 +201,32 @@ def summary_text(snapshot: dict[str, Any] | None) -> str:
|
||||
|
||||
nodes = summary.get("nodes") if isinstance(summary.get("nodes"), dict) else {}
|
||||
if nodes:
|
||||
workers = {}
|
||||
if isinstance(summary.get("nodes_summary"), dict):
|
||||
workers = summary["nodes_summary"].get("workers") or {}
|
||||
workers_total = workers.get("total")
|
||||
workers_ready = workers.get("ready")
|
||||
workers_str = ""
|
||||
if workers_total is not None and workers_ready is not None:
|
||||
workers_str = f", workers_ready={workers_ready}/{workers_total}"
|
||||
lines.append(
|
||||
"nodes: total={total}, ready={ready}, not_ready={not_ready}".format(
|
||||
"nodes: total={total}, ready={ready}, not_ready={not_ready}{workers}".format(
|
||||
total=nodes.get("total"),
|
||||
ready=nodes.get("ready"),
|
||||
not_ready=nodes.get("not_ready"),
|
||||
workers=workers_str,
|
||||
)
|
||||
)
|
||||
if isinstance(summary.get("nodes_summary"), dict):
|
||||
not_ready_names = summary["nodes_summary"].get("not_ready_names") or []
|
||||
if not_ready_names:
|
||||
lines.append("nodes_not_ready: " + _format_names(not_ready_names))
|
||||
by_arch = summary["nodes_summary"].get("by_arch") or {}
|
||||
if isinstance(by_arch, dict) and by_arch:
|
||||
lines.append("archs: " + _format_kv_map(by_arch))
|
||||
by_role = summary["nodes_summary"].get("by_role") or {}
|
||||
if isinstance(by_role, dict) and by_role:
|
||||
lines.append("roles: " + _format_kv_map(by_role))
|
||||
|
||||
hardware = summary.get("hardware") if isinstance(summary.get("hardware"), dict) else {}
|
||||
if hardware:
|
||||
@ -210,6 +252,22 @@ def summary_text(snapshot: dict[str, Any] | None) -> str:
|
||||
succeeded=pods.get("succeeded"),
|
||||
)
|
||||
)
|
||||
if isinstance(summary.get("metrics"), dict):
|
||||
top_restarts = summary["metrics"].get("top_restarts_1h") or []
|
||||
if isinstance(top_restarts, list) and top_restarts:
|
||||
parts = []
|
||||
for entry in top_restarts:
|
||||
metric = entry.get("metric") if isinstance(entry, dict) else {}
|
||||
value = entry.get("value") if isinstance(entry, dict) else []
|
||||
if not isinstance(metric, dict) or not isinstance(value, list) or len(value) < 2:
|
||||
continue
|
||||
namespace = metric.get("namespace")
|
||||
pod = metric.get("pod")
|
||||
count = _format_float(value[1])
|
||||
if namespace and pod:
|
||||
parts.append(f"{namespace}/{pod}={count}")
|
||||
if parts:
|
||||
lines.append("restarts_1h_top: " + "; ".join(parts))
|
||||
|
||||
postgres = summary.get("postgres") if isinstance(summary.get("postgres"), dict) else {}
|
||||
if postgres:
|
||||
@ -229,7 +287,10 @@ def summary_text(snapshot: dict[str, Any] | None) -> str:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
node = entry.get("node")
|
||||
value = _format_float(entry.get("value"))
|
||||
if key in {"net", "io"}:
|
||||
value = _format_rate_bytes(entry.get("value"))
|
||||
else:
|
||||
value = _format_float(entry.get("value"))
|
||||
if node:
|
||||
parts.append(f"{key}={node} ({value})")
|
||||
if parts:
|
||||
@ -238,6 +299,24 @@ def summary_text(snapshot: dict[str, Any] | None) -> str:
|
||||
workloads = summary.get("workloads")
|
||||
if isinstance(workloads, list) and workloads:
|
||||
lines.append(f"workloads: total={len(workloads)}")
|
||||
top_workloads = sorted(
|
||||
(item for item in workloads if isinstance(item, dict)),
|
||||
key=lambda item: (-int(item.get("pods_total") or 0), item.get("workload") or ""),
|
||||
)[:5]
|
||||
if top_workloads:
|
||||
parts = []
|
||||
for item in top_workloads:
|
||||
namespace = item.get("namespace")
|
||||
name = item.get("workload")
|
||||
pods_total = item.get("pods_total")
|
||||
primary = item.get("primary_node")
|
||||
if namespace and name:
|
||||
label = f"{namespace}/{name}={pods_total}"
|
||||
if primary:
|
||||
label = f"{label} (primary={primary})"
|
||||
parts.append(label)
|
||||
if parts:
|
||||
lines.append("workloads_top: " + "; ".join(parts))
|
||||
|
||||
flux = summary.get("flux") if isinstance(summary.get("flux"), dict) else {}
|
||||
if flux:
|
||||
@ -245,4 +324,7 @@ def summary_text(snapshot: dict[str, Any] | None) -> str:
|
||||
if not_ready is not None:
|
||||
lines.append(f"flux_not_ready: {not_ready}")
|
||||
|
||||
lines.append("units: cpu_pct, ram_pct, net=bytes_per_sec, io=bytes_per_sec")
|
||||
lines.append("windows: rates=5m, restarts=1h")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user