diff --git a/atlasbot/snapshot/builder.py b/atlasbot/snapshot/builder.py index 7819d54..a6edddb 100644 --- a/atlasbot/snapshot/builder.py +++ b/atlasbot/snapshot/builder.py @@ -1258,6 +1258,25 @@ def _append_flux(lines: list[str], summary: dict[str, Any]) -> None: not_ready = flux.get("not_ready") if not_ready is not None: lines.append(f"flux_not_ready: {not_ready}") + items = flux.get("items") + if isinstance(items, list) and items: + parts = [] + for item in items[:10]: + if not isinstance(item, dict): + continue + name = item.get("name") or "" + namespace = item.get("namespace") or "" + reason = item.get("reason") or "" + suspended = item.get("suspended") + label = f"{namespace}/{name}".strip("/") + if reason: + label = f"{label} ({reason})" + if suspended: + label = f"{label} [suspended]" + if label: + parts.append(label) + if parts: + lines.append("flux_not_ready_items: " + "; ".join(parts)) def _append_units_windows(lines: list[str], summary: dict[str, Any]) -> None: @@ -1274,6 +1293,128 @@ def _append_units_windows(lines: list[str], summary: dict[str, Any]) -> None: lines.append("windows: rates=5m, restarts=1h") +def _append_node_load_summary(lines: list[str], summary: dict[str, Any]) -> None: + node_load = summary.get("node_load_summary") + if not isinstance(node_load, dict) or not node_load: + return + top = node_load.get("top") + if isinstance(top, list) and top: + parts = [] + for entry in top[:5]: + if not isinstance(entry, dict): + continue + node = entry.get("node") or "" + load = entry.get("load_index") + cpu = entry.get("cpu") + ram = entry.get("ram") + io = entry.get("io") + net = entry.get("net") + pods_total = entry.get("pods_total") + label = f"{node} idx={_format_float(load)}" + if isinstance(pods_total, (int, float)): + label += f" pods={int(pods_total)}" + label += f" cpu={_format_float(cpu)} ram={_format_float(ram)}" + label += f" io={_format_rate_bytes(io)} net={_format_rate_bytes(net)}" + parts.append(label) + if parts: + lines.append("node_load_top: " + "; ".join(parts)) + outliers = node_load.get("outliers") + if isinstance(outliers, list) and outliers: + names = [entry.get("node") for entry in outliers if isinstance(entry, dict)] + names = [name for name in names if isinstance(name, str) and name] + if names: + lines.append("node_load_outliers: " + _format_names(names)) + + +def _capacity_ratio_parts(entries: list[dict[str, Any]], ratio_key: str, usage_key: str, req_key: str) -> list[str]: + parts: list[str] = [] + for entry in entries[:5]: + if not isinstance(entry, dict): + continue + ns = entry.get("namespace") or "" + ratio = entry.get(ratio_key) + usage = entry.get(usage_key) + req = entry.get(req_key) + if ns: + parts.append( + f"{ns}={_format_float(ratio)} (usage={_format_float(usage)} req={_format_float(req)})" + ) + return parts + + +def _capacity_headroom_parts(entries: list[dict[str, Any]]) -> list[str]: + parts: list[str] = [] + for entry in entries[:5]: + if not isinstance(entry, dict): + continue + ns = entry.get("namespace") or "" + headroom = entry.get("headroom") + if ns: + parts.append(f"{ns}={_format_float(headroom)}") + return parts + + +def _append_namespace_capacity_summary(lines: list[str], summary: dict[str, Any]) -> None: # noqa: C901 + cap = summary.get("namespace_capacity_summary") + if not isinstance(cap, dict) or not cap: + return + cpu_ratio = cap.get("cpu_ratio_top") + if isinstance(cpu_ratio, list): + parts = _capacity_ratio_parts(cpu_ratio, "cpu_usage_ratio", "cpu_usage", "cpu_requests") + if parts: + lines.append("namespace_cpu_ratio_top: " + "; ".join(parts)) + mem_ratio = cap.get("mem_ratio_top") + if isinstance(mem_ratio, list): + parts = _capacity_ratio_parts(mem_ratio, "mem_usage_ratio", "mem_usage", "mem_requests") + if parts: + lines.append("namespace_mem_ratio_top: " + "; ".join(parts)) + cpu_headroom = cap.get("cpu_headroom_low") + if isinstance(cpu_headroom, list): + parts = _capacity_headroom_parts(cpu_headroom) + if parts: + lines.append("namespace_cpu_headroom_low: " + "; ".join(parts)) + mem_headroom = cap.get("mem_headroom_low") + if isinstance(mem_headroom, list): + parts = _capacity_headroom_parts(mem_headroom) + if parts: + lines.append("namespace_mem_headroom_low: " + "; ".join(parts)) + cpu_over = cap.get("cpu_overcommitted") + mem_over = cap.get("mem_overcommitted") + if cpu_over is not None or mem_over is not None: + lines.append(f"namespace_overcommitted: cpu={cpu_over} mem={mem_over}") + + +def _append_workloads_by_namespace(lines: list[str], summary: dict[str, Any]) -> None: + workloads = summary.get("workloads") + if not isinstance(workloads, list) or not workloads: + return + by_ns: dict[str, list[dict[str, Any]]] = {} + for item in workloads: + if not isinstance(item, dict): + continue + ns = item.get("namespace") or "" + name = item.get("workload") or "" + if not ns or not name: + continue + by_ns.setdefault(ns, []).append(item) + for ns, items in sorted(by_ns.items()): + items.sort( + key=lambda item: (-int(item.get("pods_total") or 0), item.get("workload") or "") + ) + parts = [] + for entry in items[:2]: + name = entry.get("workload") or "" + pods = entry.get("pods_total") + primary = entry.get("primary_node") + label = f"{name}({pods})" if pods is not None else name + if primary: + label = f"{label}@{primary}" + if label: + parts.append(label) + if parts: + lines.append(f"workloads_top_{ns}: " + "; ".join(parts)) + + def summary_text(snapshot: dict[str, Any] | None) -> str: summary = build_summary(snapshot) if not summary: @@ -1305,8 +1446,11 @@ def summary_text(snapshot: dict[str, Any] | None) -> str: _append_hottest(lines, summary) _append_pvc_usage(lines, summary) _append_root_disk_headroom(lines, summary) + _append_namespace_capacity_summary(lines, summary) _append_longhorn(lines, summary) _append_workloads(lines, summary) + _append_workloads_by_namespace(lines, summary) + _append_node_load_summary(lines, summary) _append_flux(lines, summary) _append_units_windows(lines, summary) return "\n".join(lines)