456 lines
18 KiB
Python
456 lines
18 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
|
|
from .cluster_state_contract import *
|
|
|
|
def _vector_to_named(entries: list[dict[str, Any]], label_key: str, name_key: str) -> list[dict[str, Any]]:
|
|
output: list[dict[str, Any]] = []
|
|
for item in entries:
|
|
if not isinstance(item, dict):
|
|
continue
|
|
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
|
value = item.get("value")
|
|
label = metric.get(label_key) if isinstance(metric, dict) else None
|
|
if not isinstance(label, str) or not label:
|
|
continue
|
|
output.append({name_key: label, "value": value, "metric": metric})
|
|
output.sort(key=lambda item: (-(item.get("value") or 0), item.get(name_key) or ""))
|
|
return output
|
|
|
|
|
|
def _pvc_top(entries: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
output: list[dict[str, Any]] = []
|
|
for item in entries:
|
|
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
|
namespace = metric.get("namespace")
|
|
pvc = metric.get("persistentvolumeclaim")
|
|
if not isinstance(namespace, str) or not isinstance(pvc, str):
|
|
continue
|
|
output.append(
|
|
{
|
|
"namespace": namespace,
|
|
"pvc": pvc,
|
|
"used_percent": item.get("value"),
|
|
}
|
|
)
|
|
output.sort(key=lambda item: (-(item.get("used_percent") or 0), item.get("namespace") or ""))
|
|
return output
|
|
|
|
|
|
def _namespace_context(
|
|
namespace_pods: list[dict[str, Any]],
|
|
namespace_nodes: list[dict[str, Any]],
|
|
namespace_capacity: list[dict[str, Any]],
|
|
namespace_baseline: dict[str, dict[str, dict[str, float]]],
|
|
) -> list[dict[str, Any]]:
|
|
node_map = {entry.get("namespace"): entry for entry in namespace_nodes if isinstance(entry, dict)}
|
|
cap_map = {entry.get("namespace"): entry for entry in namespace_capacity if isinstance(entry, dict)}
|
|
output: list[dict[str, Any]] = []
|
|
for entry in namespace_pods:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
namespace = entry.get("namespace")
|
|
if not isinstance(namespace, str) or not namespace:
|
|
continue
|
|
nodes_entry = node_map.get(namespace, {})
|
|
cap_entry = cap_map.get(namespace, {})
|
|
nodes = nodes_entry.get("nodes") if isinstance(nodes_entry.get("nodes"), dict) else {}
|
|
top_nodes: list[dict[str, Any]] = []
|
|
if isinstance(nodes, dict):
|
|
top_nodes = [
|
|
{"node": name, "pods": count}
|
|
for name, count in sorted(nodes.items(), key=lambda item: (-item[1], item[0]))[:3]
|
|
]
|
|
baseline = namespace_baseline.get(namespace, {}) if isinstance(namespace_baseline, dict) else {}
|
|
delta_cpu = _baseline_delta(cap_entry.get("cpu_usage"), baseline.get("cpu", {}))
|
|
delta_mem = _baseline_delta(cap_entry.get("mem_usage"), baseline.get("mem", {}))
|
|
baseline_delta = {k: v for k, v in (("cpu", delta_cpu), ("mem", delta_mem)) if v is not None}
|
|
output.append(
|
|
{
|
|
"namespace": namespace,
|
|
"pods_total": entry.get("pods_total"),
|
|
"pods_running": entry.get("pods_running"),
|
|
"pods_pending": entry.get("pods_pending"),
|
|
"pods_failed": entry.get("pods_failed"),
|
|
"pods_succeeded": entry.get("pods_succeeded"),
|
|
"primary_node": nodes_entry.get("primary_node"),
|
|
"nodes_top": top_nodes,
|
|
"cpu_usage": cap_entry.get("cpu_usage"),
|
|
"cpu_requests": cap_entry.get("cpu_requests"),
|
|
"cpu_ratio": cap_entry.get("cpu_usage_ratio"),
|
|
"mem_usage": cap_entry.get("mem_usage"),
|
|
"mem_requests": cap_entry.get("mem_requests"),
|
|
"mem_ratio": cap_entry.get("mem_usage_ratio"),
|
|
"baseline_delta": baseline_delta,
|
|
}
|
|
)
|
|
output.sort(key=lambda item: (-(item.get("pods_total") or 0), item.get("namespace") or ""))
|
|
return output
|
|
|
|
|
|
def _namespace_nodes_top(namespace_context: list[dict[str, Any]], limit: int = 5) -> list[dict[str, Any]]:
|
|
output: list[dict[str, Any]] = []
|
|
for entry in namespace_context[:limit]:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
output.append(
|
|
{
|
|
"namespace": entry.get("namespace"),
|
|
"pods_total": entry.get("pods_total"),
|
|
"primary_node": entry.get("primary_node"),
|
|
"nodes_top": entry.get("nodes_top") or [],
|
|
}
|
|
)
|
|
return output
|
|
|
|
|
|
def _workload_nodes_top(workloads: list[dict[str, Any]], limit: int = 5) -> list[dict[str, Any]]:
|
|
output: list[dict[str, Any]] = []
|
|
entries = [w for w in workloads if isinstance(w, dict)]
|
|
entries.sort(
|
|
key=lambda item: (-(item.get("pods_total") or 0), item.get("namespace") or "", item.get("workload") or ""),
|
|
)
|
|
for entry in entries[:limit]:
|
|
output.append(
|
|
{
|
|
"namespace": entry.get("namespace"),
|
|
"workload": entry.get("workload"),
|
|
"source": entry.get("source"),
|
|
"pods_total": entry.get("pods_total"),
|
|
"pods_running": entry.get("pods_running"),
|
|
"primary_node": entry.get("primary_node"),
|
|
}
|
|
)
|
|
return output
|
|
|
|
|
|
def _node_workload_map(workloads: list[dict[str, Any]]) -> dict[str, dict[str, int]]:
|
|
mapping: dict[str, dict[str, int]] = {}
|
|
for entry in workloads:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
namespace = entry.get("namespace")
|
|
workload = entry.get("workload")
|
|
if not isinstance(workload, str) or not workload:
|
|
continue
|
|
nodes = entry.get("nodes")
|
|
if not isinstance(nodes, dict):
|
|
continue
|
|
key = f"{namespace}/{workload}" if isinstance(namespace, str) and namespace else workload
|
|
for node, count in nodes.items():
|
|
if not isinstance(node, str) or not node:
|
|
continue
|
|
if not isinstance(count, int):
|
|
try:
|
|
count = int(count)
|
|
except (TypeError, ValueError):
|
|
continue
|
|
if count <= 0:
|
|
continue
|
|
mapping.setdefault(node, {})[key] = mapping.setdefault(node, {}).get(key, 0) + count
|
|
return mapping
|
|
|
|
|
|
def _node_workloads_top(
|
|
workload_map: dict[str, dict[str, int]],
|
|
limit_nodes: int = _NODE_WORKLOAD_LIMIT,
|
|
limit_workloads: int = _NODE_WORKLOAD_TOP,
|
|
) -> list[dict[str, Any]]:
|
|
output: list[dict[str, Any]] = []
|
|
for node, workloads in workload_map.items():
|
|
if not isinstance(node, str) or not node or not isinstance(workloads, dict):
|
|
continue
|
|
total = sum(count for count in workloads.values() if isinstance(count, int))
|
|
top = sorted(workloads.items(), key=lambda item: (-item[1], item[0]))[:limit_workloads]
|
|
output.append({"node": node, "pods_total": total, "workloads_top": top})
|
|
output.sort(key=lambda item: (-(item.get("pods_total") or 0), item.get("node") or ""))
|
|
return output[:limit_nodes]
|
|
|
|
|
|
def _workload_index(workloads: list[dict[str, Any]], limit: int = _WORKLOAD_INDEX_LIMIT) -> list[dict[str, Any]]:
|
|
entries = [entry for entry in workloads if isinstance(entry, dict)]
|
|
entries.sort(
|
|
key=lambda item: (-(item.get("pods_total") or 0), item.get("namespace") or "", item.get("workload") or ""),
|
|
)
|
|
output: list[dict[str, Any]] = []
|
|
for entry in entries[:limit]:
|
|
nodes = entry.get("nodes") if isinstance(entry.get("nodes"), dict) else {}
|
|
nodes_top = (
|
|
sorted(nodes.items(), key=lambda item: (-item[1], item[0]))[:_NODE_WORKLOAD_TOP]
|
|
if isinstance(nodes, dict)
|
|
else []
|
|
)
|
|
output.append(
|
|
{
|
|
"namespace": entry.get("namespace"),
|
|
"workload": entry.get("workload"),
|
|
"pods_total": entry.get("pods_total"),
|
|
"pods_running": entry.get("pods_running"),
|
|
"primary_node": entry.get("primary_node"),
|
|
"nodes_top": nodes_top,
|
|
}
|
|
)
|
|
return output
|
|
|
|
|
|
def _events_summary(events: dict[str, Any]) -> dict[str, Any]:
|
|
if not isinstance(events, dict):
|
|
return {}
|
|
by_namespace = events.get("warnings_by_namespace") if isinstance(events.get("warnings_by_namespace"), dict) else {}
|
|
top_namespace = ""
|
|
top_namespace_count = 0
|
|
if by_namespace:
|
|
top_namespace, top_namespace_count = sorted(
|
|
by_namespace.items(), key=lambda item: (-item[1], item[0])
|
|
)[0]
|
|
return {
|
|
"warnings_total": events.get("warnings_total"),
|
|
"top_reason": events.get("warnings_top_reason"),
|
|
"top_namespace": {"namespace": top_namespace, "count": top_namespace_count},
|
|
"latest": events.get("warnings_latest"),
|
|
"recent": (events.get("warnings_recent") or [])[:_EVENTS_SUMMARY_LIMIT],
|
|
}
|
|
|
|
|
|
def _build_lexicon() -> dict[str, Any]:
|
|
terms = [
|
|
{
|
|
"term": "hottest",
|
|
"meaning": "highest utilization for a metric (cpu, ram, net, io, load_index).",
|
|
},
|
|
{
|
|
"term": "pressure",
|
|
"meaning": "node condition flags (MemoryPressure, DiskPressure, PIDPressure, NetworkUnavailable).",
|
|
},
|
|
{
|
|
"term": "load_index",
|
|
"meaning": "composite load score derived from cpu, ram, net, io.",
|
|
},
|
|
{"term": "top", "meaning": "highest values within a category."},
|
|
{"term": "pods", "meaning": "running workload instances on a node or namespace."},
|
|
{"term": "workload", "meaning": "deployment/statefulset/daemonset grouping."},
|
|
]
|
|
aliases = {
|
|
"hot node": "node with highest load_index",
|
|
"hottest by cpu": "node with highest cpu utilization",
|
|
"hottest by ram": "node with highest ram utilization",
|
|
"pressure node": "node with pressure condition flags",
|
|
}
|
|
return {"terms": terms, "aliases": aliases}
|
|
|
|
|
|
def _top_named_entries(
|
|
entries: list[dict[str, Any]],
|
|
name_key: str,
|
|
limit: int,
|
|
) -> list[dict[str, Any]]:
|
|
output: list[dict[str, Any]] = []
|
|
for entry in entries or []:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
name = entry.get(name_key)
|
|
if not isinstance(name, str) or not name:
|
|
continue
|
|
value = entry.get("value")
|
|
try:
|
|
numeric = float(value)
|
|
except (TypeError, ValueError):
|
|
numeric = 0.0
|
|
output.append({"name": name, "value": numeric})
|
|
output.sort(key=lambda item: -(item.get("value") or 0))
|
|
return output[:limit]
|
|
|
|
|
|
def _cross_node_metric_top(metrics: dict[str, Any], node_context: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
usage = metrics.get("node_usage") if isinstance(metrics.get("node_usage"), dict) else {}
|
|
node_map = {entry.get("node"): entry for entry in node_context if isinstance(entry, dict)}
|
|
output: list[dict[str, Any]] = []
|
|
for metric in ("cpu", "ram", "net", "io", "disk"):
|
|
series = usage.get(metric)
|
|
if not isinstance(series, list):
|
|
continue
|
|
for top in _top_named_entries(series, "node", _CROSS_NODE_TOP):
|
|
node = top.get("name")
|
|
if not node:
|
|
continue
|
|
context = node_map.get(node, {})
|
|
output.append(
|
|
{
|
|
"metric": metric,
|
|
"node": node,
|
|
"value": top.get("value"),
|
|
"cpu": context.get("cpu"),
|
|
"ram": context.get("ram"),
|
|
"net": context.get("net"),
|
|
"io": context.get("io"),
|
|
"disk": context.get("disk"),
|
|
"load_index": context.get("load_index"),
|
|
"pods_total": context.get("pods_total"),
|
|
"hardware": context.get("hardware"),
|
|
"roles": context.get("roles"),
|
|
"pressure_flags": context.get("pressure_flags"),
|
|
}
|
|
)
|
|
return output
|
|
|
|
|
|
def _cross_namespace_metric_top(
|
|
metrics: dict[str, Any],
|
|
namespace_context: list[dict[str, Any]],
|
|
) -> list[dict[str, Any]]:
|
|
top = metrics.get("namespace_top") if isinstance(metrics.get("namespace_top"), dict) else {}
|
|
namespace_map = {
|
|
entry.get("namespace"): entry
|
|
for entry in namespace_context
|
|
if isinstance(entry, dict) and entry.get("namespace")
|
|
}
|
|
output: list[dict[str, Any]] = []
|
|
for metric in ("cpu", "mem", "net", "io", "restarts"):
|
|
series = top.get(metric)
|
|
if not isinstance(series, list):
|
|
continue
|
|
for entry in _top_named_entries(series, "namespace", _CROSS_NAMESPACE_TOP):
|
|
namespace = entry.get("name")
|
|
if not namespace:
|
|
continue
|
|
context = namespace_map.get(namespace, {})
|
|
output.append(
|
|
{
|
|
"metric": metric,
|
|
"namespace": namespace,
|
|
"value": entry.get("value"),
|
|
"pods_total": context.get("pods_total"),
|
|
"pods_running": context.get("pods_running"),
|
|
"cpu_ratio": context.get("cpu_ratio"),
|
|
"mem_ratio": context.get("mem_ratio"),
|
|
"primary_node": context.get("primary_node"),
|
|
"nodes_top": context.get("nodes_top") or [],
|
|
}
|
|
)
|
|
return output
|
|
|
|
|
|
def _build_cross_stats(
|
|
metrics: dict[str, Any],
|
|
node_context: list[dict[str, Any]],
|
|
namespace_context: list[dict[str, Any]],
|
|
workloads: list[dict[str, Any]],
|
|
) -> dict[str, Any]:
|
|
return {
|
|
"node_metric_top": _cross_node_metric_top(metrics, node_context),
|
|
"namespace_metric_top": _cross_namespace_metric_top(metrics, namespace_context),
|
|
"pvc_top": _pvc_top(metrics.get("pvc_usage_top", []))[:_CROSS_PVC_TOP],
|
|
"workload_top": _workload_nodes_top(workloads, _CROSS_NAMESPACE_TOP),
|
|
}
|
|
|
|
|
|
def _node_context(
|
|
node_details: list[dict[str, Any]],
|
|
node_load: list[dict[str, Any]],
|
|
node_baseline: dict[str, dict[str, dict[str, float]]],
|
|
node_workloads: dict[str, dict[str, int]],
|
|
) -> list[dict[str, Any]]:
|
|
load_map = {entry.get("node"): entry for entry in node_load if isinstance(entry, dict)}
|
|
output: list[dict[str, Any]] = []
|
|
for entry in node_details:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
name = entry.get("name")
|
|
if not isinstance(name, str) or not name:
|
|
continue
|
|
load_entry = load_map.get(name, {})
|
|
baseline = node_baseline.get(name, {}) if isinstance(node_baseline, dict) else {}
|
|
deltas: dict[str, float] = {}
|
|
for key in ("cpu", "ram", "net", "io", "disk"):
|
|
current = load_entry.get(key)
|
|
stats = baseline.get(key, {}) if isinstance(baseline, dict) else {}
|
|
delta = _baseline_delta(current, stats)
|
|
if delta is not None:
|
|
deltas[key] = delta
|
|
workloads = node_workloads.get(name, {}) if isinstance(node_workloads, dict) else {}
|
|
workloads_top = sorted(workloads.items(), key=lambda item: (-item[1], item[0]))[:_NODE_WORKLOAD_TOP]
|
|
output.append(
|
|
{
|
|
"node": name,
|
|
"ready": entry.get("ready"),
|
|
"roles": entry.get("roles"),
|
|
"is_worker": entry.get("is_worker"),
|
|
"hardware": entry.get("hardware"),
|
|
"arch": entry.get("arch"),
|
|
"os": entry.get("os"),
|
|
"taints": entry.get("taints"),
|
|
"unschedulable": entry.get("unschedulable"),
|
|
"pressure_flags": entry.get("pressure"),
|
|
"pods_total": load_entry.get("pods_total"),
|
|
"cpu": load_entry.get("cpu"),
|
|
"ram": load_entry.get("ram"),
|
|
"disk": load_entry.get("disk"),
|
|
"net": load_entry.get("net"),
|
|
"io": load_entry.get("io"),
|
|
"load_index": load_entry.get("load_index"),
|
|
"baseline": baseline,
|
|
"baseline_delta": deltas,
|
|
"workloads_top": workloads_top,
|
|
}
|
|
)
|
|
output.sort(key=lambda item: (-(item.get("load_index") or 0), item.get("node") or ""))
|
|
return output
|
|
|
|
|
|
def _baseline_delta(current: Any, stats: dict[str, Any]) -> float | None:
|
|
if not isinstance(current, (int, float)):
|
|
return None
|
|
avg = stats.get("avg")
|
|
if not isinstance(avg, (int, float)) or avg == 0:
|
|
return None
|
|
return round(((float(current) - float(avg)) / float(avg)) * 100, 2)
|
|
|
|
|
|
def _delta_severity(delta: float) -> str:
|
|
magnitude = abs(delta)
|
|
if magnitude >= _BASELINE_DELTA_CRIT:
|
|
return "critical"
|
|
if magnitude >= _BASELINE_DELTA_WARN:
|
|
return "warning"
|
|
return "info"
|
|
|
|
|
|
def _delta_entry_label(entry: dict[str, Any]) -> tuple[str, str]:
|
|
if "node" in entry:
|
|
return ("node", str(entry.get("node") or ""))
|
|
return ("namespace", str(entry.get("namespace") or ""))
|
|
|
|
|
|
def _delta_top(entries: list[dict[str, Any]], key: str, limit: int = _DELTA_TOP_LIMIT) -> list[dict[str, Any]]:
|
|
output: list[dict[str, Any]] = []
|
|
for entry in entries:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
deltas = entry.get("baseline_delta") if isinstance(entry.get("baseline_delta"), dict) else {}
|
|
delta = deltas.get(key)
|
|
if not isinstance(delta, (int, float)):
|
|
continue
|
|
label_key, label_value = _delta_entry_label(entry)
|
|
output.append(
|
|
{
|
|
label_key: label_value,
|
|
"metric": key,
|
|
"delta": delta,
|
|
"severity": _delta_severity(float(delta)),
|
|
}
|
|
)
|
|
output.sort(key=lambda item: (-(abs(item.get("delta") or 0)), item.get("metric") or ""))
|
|
return output[:limit]
|
|
|
|
|
|
def _reason_top(counts: dict[str, Any], limit: int = _REASON_TOP_LIMIT) -> list[dict[str, Any]]:
|
|
output: list[dict[str, Any]] = []
|
|
for reason, value in counts.items() if isinstance(counts, dict) else []:
|
|
if isinstance(reason, str) and reason and isinstance(value, (int, float)):
|
|
output.append({"reason": reason, "count": int(value)})
|
|
output.sort(key=lambda item: (-item.get("count", 0), item.get("reason") or ""))
|
|
return output[:limit]
|
|
|
|
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|