cluster-state: add baseline deltas and relationships
This commit is contained in:
parent
9276d2538a
commit
8446c1f032
@ -1948,6 +1948,7 @@ def _namespace_context(
|
|||||||
namespace_pods: list[dict[str, Any]],
|
namespace_pods: list[dict[str, Any]],
|
||||||
namespace_nodes: list[dict[str, Any]],
|
namespace_nodes: list[dict[str, Any]],
|
||||||
namespace_capacity: list[dict[str, Any]],
|
namespace_capacity: list[dict[str, Any]],
|
||||||
|
namespace_baseline: dict[str, dict[str, dict[str, float]]],
|
||||||
) -> list[dict[str, Any]]:
|
) -> list[dict[str, Any]]:
|
||||||
node_map = {entry.get("namespace"): entry for entry in namespace_nodes if isinstance(entry, dict)}
|
node_map = {entry.get("namespace"): entry for entry in namespace_nodes if isinstance(entry, dict)}
|
||||||
cap_map = {entry.get("namespace"): entry for entry in namespace_capacity if isinstance(entry, dict)}
|
cap_map = {entry.get("namespace"): entry for entry in namespace_capacity if isinstance(entry, dict)}
|
||||||
@ -1967,6 +1968,10 @@ def _namespace_context(
|
|||||||
{"node": name, "pods": count}
|
{"node": name, "pods": count}
|
||||||
for name, count in sorted(nodes.items(), key=lambda item: (-item[1], item[0]))[:3]
|
for name, count in sorted(nodes.items(), key=lambda item: (-item[1], item[0]))[:3]
|
||||||
]
|
]
|
||||||
|
baseline = namespace_baseline.get(namespace, {}) if isinstance(namespace_baseline, dict) else {}
|
||||||
|
delta_cpu = _baseline_delta(cap_entry.get("cpu_usage"), baseline.get("cpu", {}))
|
||||||
|
delta_mem = _baseline_delta(cap_entry.get("mem_usage"), baseline.get("mem", {}))
|
||||||
|
baseline_delta = {k: v for k, v in (("cpu", delta_cpu), ("mem", delta_mem)) if v is not None}
|
||||||
output.append(
|
output.append(
|
||||||
{
|
{
|
||||||
"namespace": namespace,
|
"namespace": namespace,
|
||||||
@ -1983,12 +1988,49 @@ def _namespace_context(
|
|||||||
"mem_usage": cap_entry.get("mem_usage"),
|
"mem_usage": cap_entry.get("mem_usage"),
|
||||||
"mem_requests": cap_entry.get("mem_requests"),
|
"mem_requests": cap_entry.get("mem_requests"),
|
||||||
"mem_ratio": cap_entry.get("mem_usage_ratio"),
|
"mem_ratio": cap_entry.get("mem_usage_ratio"),
|
||||||
|
"baseline_delta": baseline_delta,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
output.sort(key=lambda item: (-(item.get("pods_total") or 0), item.get("namespace") or ""))
|
output.sort(key=lambda item: (-(item.get("pods_total") or 0), item.get("namespace") or ""))
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def _namespace_nodes_top(namespace_context: list[dict[str, Any]], limit: int = 5) -> list[dict[str, Any]]:
|
||||||
|
output: list[dict[str, Any]] = []
|
||||||
|
for entry in namespace_context[:limit]:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
output.append(
|
||||||
|
{
|
||||||
|
"namespace": entry.get("namespace"),
|
||||||
|
"pods_total": entry.get("pods_total"),
|
||||||
|
"primary_node": entry.get("primary_node"),
|
||||||
|
"nodes_top": entry.get("nodes_top") or [],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def _workload_nodes_top(workloads: list[dict[str, Any]], limit: int = 5) -> list[dict[str, Any]]:
|
||||||
|
output: list[dict[str, Any]] = []
|
||||||
|
entries = [w for w in workloads if isinstance(w, dict)]
|
||||||
|
entries.sort(
|
||||||
|
key=lambda item: (-(item.get("pods_total") or 0), item.get("namespace") or "", item.get("workload") or ""),
|
||||||
|
)
|
||||||
|
for entry in entries[:limit]:
|
||||||
|
output.append(
|
||||||
|
{
|
||||||
|
"namespace": entry.get("namespace"),
|
||||||
|
"workload": entry.get("workload"),
|
||||||
|
"source": entry.get("source"),
|
||||||
|
"pods_total": entry.get("pods_total"),
|
||||||
|
"pods_running": entry.get("pods_running"),
|
||||||
|
"primary_node": entry.get("primary_node"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
def _node_context(
|
def _node_context(
|
||||||
node_details: list[dict[str, Any]],
|
node_details: list[dict[str, Any]],
|
||||||
node_load: list[dict[str, Any]],
|
node_load: list[dict[str, Any]],
|
||||||
@ -2004,6 +2046,13 @@ def _node_context(
|
|||||||
continue
|
continue
|
||||||
load_entry = load_map.get(name, {})
|
load_entry = load_map.get(name, {})
|
||||||
baseline = node_baseline.get(name, {}) if isinstance(node_baseline, dict) else {}
|
baseline = node_baseline.get(name, {}) if isinstance(node_baseline, dict) else {}
|
||||||
|
deltas: dict[str, float] = {}
|
||||||
|
for key in ("cpu", "ram", "net", "io", "disk"):
|
||||||
|
current = load_entry.get(key)
|
||||||
|
stats = baseline.get(key, {}) if isinstance(baseline, dict) else {}
|
||||||
|
delta = _baseline_delta(current, stats)
|
||||||
|
if delta is not None:
|
||||||
|
deltas[key] = delta
|
||||||
output.append(
|
output.append(
|
||||||
{
|
{
|
||||||
"node": name,
|
"node": name,
|
||||||
@ -2024,12 +2073,22 @@ def _node_context(
|
|||||||
"io": load_entry.get("io"),
|
"io": load_entry.get("io"),
|
||||||
"load_index": load_entry.get("load_index"),
|
"load_index": load_entry.get("load_index"),
|
||||||
"baseline": baseline,
|
"baseline": baseline,
|
||||||
|
"baseline_delta": deltas,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
output.sort(key=lambda item: (-(item.get("load_index") or 0), item.get("node") or ""))
|
output.sort(key=lambda item: (-(item.get("load_index") or 0), item.get("node") or ""))
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def _baseline_delta(current: Any, stats: dict[str, Any]) -> float | None:
|
||||||
|
if not isinstance(current, (int, float)):
|
||||||
|
return None
|
||||||
|
avg = stats.get("avg")
|
||||||
|
if not isinstance(avg, (int, float)) or avg == 0:
|
||||||
|
return None
|
||||||
|
return round(((float(current) - float(avg)) / float(avg)) * 100, 2)
|
||||||
|
|
||||||
|
|
||||||
def _build_anomalies(
|
def _build_anomalies(
|
||||||
metrics: dict[str, Any],
|
metrics: dict[str, Any],
|
||||||
nodes_summary: dict[str, Any],
|
nodes_summary: dict[str, Any],
|
||||||
@ -2401,6 +2460,7 @@ def collect_cluster_state() -> tuple[dict[str, Any], ClusterStateSummary]:
|
|||||||
namespace_pods,
|
namespace_pods,
|
||||||
namespace_nodes,
|
namespace_nodes,
|
||||||
metrics.get("namespace_capacity", []),
|
metrics.get("namespace_capacity", []),
|
||||||
|
metrics.get("namespace_baseline_map", {}),
|
||||||
)
|
)
|
||||||
node_context = _node_context(
|
node_context = _node_context(
|
||||||
node_details,
|
node_details,
|
||||||
@ -2431,6 +2491,11 @@ def collect_cluster_state() -> tuple[dict[str, Any], ClusterStateSummary]:
|
|||||||
"workload_not_ready": _workload_not_ready_items(workload_health)[:5],
|
"workload_not_ready": _workload_not_ready_items(workload_health)[:5],
|
||||||
"pod_restarts": _pod_restarts_top(metrics),
|
"pod_restarts": _pod_restarts_top(metrics),
|
||||||
},
|
},
|
||||||
|
"relationships": {
|
||||||
|
"namespace_nodes": _namespace_nodes_top(namespace_context, 5),
|
||||||
|
"node_namespaces": metrics.get("node_pods_top", []),
|
||||||
|
"workload_nodes": _workload_nodes_top(workloads, 5),
|
||||||
|
},
|
||||||
"attention_ranked": _build_attention_ranked(metrics, node_context, pod_issues, workload_health),
|
"attention_ranked": _build_attention_ranked(metrics, node_context, pod_issues, workload_health),
|
||||||
"anomalies": anomalies,
|
"anomalies": anomalies,
|
||||||
"health_bullets": _health_bullets(metrics, node_summary, workload_health, anomalies),
|
"health_bullets": _health_bullets(metrics, node_summary, workload_health, anomalies),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user