402 lines
15 KiB
Python
402 lines
15 KiB
Python
from __future__ import annotations
|
|
|
|
from datetime import datetime, timezone
|
|
from typing import Any
|
|
|
|
from .cluster_state_contract import *
|
|
|
|
def _node_usage_by_hardware(node_load: list[dict[str, Any]], node_details: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
if not node_load or not node_details:
|
|
return []
|
|
hardware_by_node = _hardware_map(node_details)
|
|
buckets: dict[str, dict[str, list[float]]] = {}
|
|
for entry in node_load:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
node = entry.get("node")
|
|
if not isinstance(node, str) or not node:
|
|
continue
|
|
hardware = hardware_by_node.get(node, "unknown")
|
|
_append_hardware_usage(buckets, str(hardware), entry)
|
|
return _finalize_hardware_usage(buckets)
|
|
|
|
|
|
def _hardware_map(node_details: list[dict[str, Any]]) -> dict[str, str]:
|
|
mapping: dict[str, str] = {}
|
|
for node in node_details:
|
|
if not isinstance(node, dict):
|
|
continue
|
|
name = node.get("name")
|
|
if isinstance(name, str) and name:
|
|
mapping[name] = str(node.get("hardware") or "unknown")
|
|
return mapping
|
|
|
|
|
|
def _append_hardware_usage(buckets: dict[str, dict[str, list[float]]], hardware: str, entry: dict[str, Any]) -> None:
|
|
bucket = buckets.setdefault(hardware, {"load_index": [], "cpu": [], "ram": [], "net": [], "io": []})
|
|
for key in ("load_index", "cpu", "ram", "net", "io"):
|
|
value = entry.get(key)
|
|
if isinstance(value, (int, float)):
|
|
bucket[key].append(float(value))
|
|
|
|
|
|
def _finalize_hardware_usage(buckets: dict[str, dict[str, list[float]]]) -> list[dict[str, Any]]:
|
|
output: list[dict[str, Any]] = []
|
|
for hardware, metrics in buckets.items():
|
|
row: dict[str, Any] = {"hardware": hardware}
|
|
for key, values in metrics.items():
|
|
if values:
|
|
row[key] = sum(values) / len(values)
|
|
output.append(row)
|
|
output.sort(key=lambda item: (-(item.get("load_index") or 0), item.get("hardware") or ""))
|
|
return output
|
|
|
|
def _node_ready(conditions: Any) -> bool:
|
|
if not isinstance(conditions, list):
|
|
return False
|
|
for condition in conditions:
|
|
if not isinstance(condition, dict):
|
|
continue
|
|
if condition.get("type") == "Ready":
|
|
return condition.get("status") == "True"
|
|
return False
|
|
|
|
|
|
def _summarize_nodes(payload: dict[str, Any]) -> dict[str, Any]:
|
|
names: list[str] = []
|
|
not_ready: list[str] = []
|
|
for node in _items(payload):
|
|
metadata = node.get("metadata") if isinstance(node.get("metadata"), dict) else {}
|
|
status = node.get("status") if isinstance(node.get("status"), dict) else {}
|
|
name = metadata.get("name") if isinstance(metadata.get("name"), str) else ""
|
|
if not name:
|
|
continue
|
|
names.append(name)
|
|
if not _node_ready(status.get("conditions")):
|
|
not_ready.append(name)
|
|
names.sort()
|
|
not_ready.sort()
|
|
total = len(names)
|
|
ready = total - len(not_ready)
|
|
return {
|
|
"total": total,
|
|
"ready": ready,
|
|
"not_ready": len(not_ready),
|
|
"names": names,
|
|
"not_ready_names": not_ready,
|
|
}
|
|
|
|
|
|
def _node_labels(labels: dict[str, Any]) -> dict[str, Any]:
|
|
if not isinstance(labels, dict):
|
|
return {}
|
|
keep: dict[str, Any] = {}
|
|
for key, value in labels.items():
|
|
if key.startswith("node-role.kubernetes.io/"):
|
|
keep[key] = value
|
|
if key in {
|
|
"kubernetes.io/arch",
|
|
"kubernetes.io/hostname",
|
|
"beta.kubernetes.io/arch",
|
|
"hardware",
|
|
"jetson",
|
|
}:
|
|
keep[key] = value
|
|
return keep
|
|
|
|
|
|
def _node_addresses(status: dict[str, Any]) -> dict[str, str]:
|
|
addresses = status.get("addresses") if isinstance(status.get("addresses"), list) else []
|
|
output: dict[str, str] = {}
|
|
for addr in addresses:
|
|
if not isinstance(addr, dict):
|
|
continue
|
|
addr_type = addr.get("type")
|
|
addr_value = addr.get("address")
|
|
if isinstance(addr_type, str) and isinstance(addr_value, str):
|
|
output[addr_type] = addr_value
|
|
return output
|
|
|
|
|
|
def _node_details(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
|
details: list[dict[str, Any]] = []
|
|
for node in _items(payload):
|
|
metadata = node.get("metadata") if isinstance(node.get("metadata"), dict) else {}
|
|
spec = node.get("spec") if isinstance(node.get("spec"), dict) else {}
|
|
status = node.get("status") if isinstance(node.get("status"), dict) else {}
|
|
node_info = status.get("nodeInfo") if isinstance(status.get("nodeInfo"), dict) else {}
|
|
labels = metadata.get("labels") if isinstance(metadata.get("labels"), dict) else {}
|
|
name = metadata.get("name") if isinstance(metadata.get("name"), str) else ""
|
|
if not name:
|
|
continue
|
|
roles = _node_roles(labels)
|
|
conditions = _node_pressure_conditions(status.get("conditions"))
|
|
created_at = metadata.get("creationTimestamp") if isinstance(metadata.get("creationTimestamp"), str) else ""
|
|
taints = _node_taints(spec.get("taints"))
|
|
details.append(
|
|
{
|
|
"name": name,
|
|
"ready": _node_ready(status.get("conditions")),
|
|
"roles": roles,
|
|
"is_worker": _node_is_worker(labels),
|
|
"labels": _node_labels(labels),
|
|
"hardware": _hardware_hint(labels, node_info),
|
|
"arch": node_info.get("architecture") or "",
|
|
"os": node_info.get("operatingSystem") or "",
|
|
"kernel": node_info.get("kernelVersion") or "",
|
|
"kubelet": node_info.get("kubeletVersion") or "",
|
|
"container_runtime": node_info.get("containerRuntimeVersion") or "",
|
|
"addresses": _node_addresses(status),
|
|
"created_at": created_at,
|
|
"age_hours": _age_hours(created_at),
|
|
"taints": taints,
|
|
"unschedulable": bool(spec.get("unschedulable")),
|
|
"capacity": _node_capacity(status.get("capacity")),
|
|
"allocatable": _node_capacity(status.get("allocatable")),
|
|
"pressure": conditions,
|
|
}
|
|
)
|
|
details.sort(key=lambda item: item.get("name") or "")
|
|
return details
|
|
|
|
|
|
def _age_hours(timestamp: str) -> float | None:
|
|
if not timestamp:
|
|
return None
|
|
try:
|
|
parsed = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
|
|
except ValueError:
|
|
return None
|
|
return round((datetime.now(timezone.utc) - parsed).total_seconds() / 3600, 1)
|
|
|
|
|
|
def _node_age_stats(details: list[dict[str, Any]]) -> dict[str, Any]:
|
|
ages: list[tuple[str, float]] = []
|
|
for node in details:
|
|
name = node.get("name") if isinstance(node, dict) else ""
|
|
age = node.get("age_hours")
|
|
if isinstance(name, str) and name and isinstance(age, (int, float)):
|
|
ages.append((name, float(age)))
|
|
if not ages:
|
|
return {}
|
|
ages.sort(key=lambda item: item[1])
|
|
values = [age for _, age in ages]
|
|
return {
|
|
"min": round(min(values), 1),
|
|
"max": round(max(values), 1),
|
|
"avg": round(sum(values) / len(values), 1),
|
|
"youngest": [{"name": name, "age_hours": age} for name, age in ages[:5]],
|
|
"oldest": [{"name": name, "age_hours": age} for name, age in ages[-5:]],
|
|
}
|
|
|
|
|
|
def _node_flagged(details: list[dict[str, Any]], key: str) -> list[str]:
|
|
names: list[str] = []
|
|
for node in details:
|
|
name = node.get("name") if isinstance(node, dict) else ""
|
|
if not isinstance(name, str) or not name:
|
|
continue
|
|
if node.get(key):
|
|
names.append(name)
|
|
names.sort()
|
|
return names
|
|
|
|
|
|
def _node_taints(raw: Any) -> list[dict[str, str]]:
|
|
if not isinstance(raw, list):
|
|
return []
|
|
taints: list[dict[str, str]] = []
|
|
for entry in raw:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
key = entry.get("key")
|
|
effect = entry.get("effect")
|
|
value = entry.get("value")
|
|
if isinstance(key, str) and isinstance(effect, str):
|
|
taints.append(
|
|
{
|
|
"key": key,
|
|
"value": value if isinstance(value, str) else "",
|
|
"effect": effect,
|
|
}
|
|
)
|
|
return taints
|
|
|
|
|
|
def _summarize_inventory(details: list[dict[str, Any]]) -> dict[str, Any]:
|
|
summary = {
|
|
"total": 0,
|
|
"ready": 0,
|
|
"workers": {"total": 0, "ready": 0},
|
|
"by_hardware": {},
|
|
"by_arch": {},
|
|
"by_role": {},
|
|
"not_ready_names": [],
|
|
"pressure_nodes": {key: [] for key in _PRESSURE_TYPES},
|
|
"age_stats": {},
|
|
"tainted_nodes": [],
|
|
"unschedulable_nodes": [],
|
|
}
|
|
not_ready: list[str] = []
|
|
for node in details:
|
|
name = _apply_node_summary(summary, node)
|
|
if name and not node.get("ready"):
|
|
not_ready.append(name)
|
|
not_ready.sort()
|
|
summary["not_ready_names"] = not_ready
|
|
for cond_type in summary["pressure_nodes"]:
|
|
summary["pressure_nodes"][cond_type].sort()
|
|
summary["age_stats"] = _node_age_stats(details)
|
|
summary["tainted_nodes"] = _node_flagged(details, "taints")
|
|
summary["unschedulable_nodes"] = _node_flagged(details, "unschedulable")
|
|
return summary
|
|
|
|
|
|
def _hardware_groups(details: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
groups: dict[str, list[str]] = {}
|
|
for node in details:
|
|
if not isinstance(node, dict):
|
|
continue
|
|
name = node.get("name")
|
|
if not isinstance(name, str) or not name:
|
|
continue
|
|
hardware = str(node.get("hardware") or "unknown")
|
|
groups.setdefault(hardware, []).append(name)
|
|
output: list[dict[str, Any]] = []
|
|
for hardware, nodes in groups.items():
|
|
nodes.sort()
|
|
output.append({"hardware": hardware, "count": len(nodes), "nodes": nodes})
|
|
output.sort(key=lambda item: (-(item.get("count") or 0), item.get("hardware") or ""))
|
|
return output
|
|
|
|
|
|
def _pressure_summary(nodes_summary: dict[str, Any]) -> dict[str, Any]:
|
|
pressure_nodes = nodes_summary.get("pressure_nodes") if isinstance(nodes_summary, dict) else {}
|
|
summary: dict[str, Any] = {"by_type": {}, "total": 0}
|
|
if isinstance(pressure_nodes, dict):
|
|
for cond, names in pressure_nodes.items():
|
|
count = len(names) if isinstance(names, list) else 0
|
|
summary["by_type"][cond] = count
|
|
summary["total"] += count
|
|
unschedulable = nodes_summary.get("unschedulable_nodes") or []
|
|
summary["unschedulable"] = len(unschedulable) if isinstance(unschedulable, list) else 0
|
|
return summary
|
|
|
|
|
|
def _apply_node_summary(summary: dict[str, Any], node: dict[str, Any]) -> str:
|
|
name = node.get("name") if isinstance(node, dict) else ""
|
|
if not isinstance(name, str) or not name:
|
|
return ""
|
|
summary["total"] += 1
|
|
ready = bool(node.get("ready"))
|
|
if ready:
|
|
summary["ready"] += 1
|
|
if node.get("is_worker"):
|
|
summary["workers"]["total"] += 1
|
|
if ready:
|
|
summary["workers"]["ready"] += 1
|
|
hardware = node.get("hardware") or "unknown"
|
|
arch = node.get("arch") or "unknown"
|
|
summary["by_hardware"][hardware] = summary["by_hardware"].get(hardware, 0) + 1
|
|
summary["by_arch"][arch] = summary["by_arch"].get(arch, 0) + 1
|
|
for role in node.get("roles") or []:
|
|
summary["by_role"][role] = summary["by_role"].get(role, 0) + 1
|
|
_apply_pressure(summary, node, name)
|
|
return name
|
|
|
|
|
|
def _apply_pressure(summary: dict[str, Any], node: dict[str, Any], name: str) -> None:
|
|
pressure = node.get("pressure") or {}
|
|
if not isinstance(pressure, dict):
|
|
return
|
|
for cond_type, active in pressure.items():
|
|
if active and cond_type in summary["pressure_nodes"]:
|
|
summary["pressure_nodes"][cond_type].append(name)
|
|
|
|
|
|
def _node_capacity(raw: Any) -> dict[str, str]:
|
|
if not isinstance(raw, dict):
|
|
return {}
|
|
output: dict[str, str] = {}
|
|
for key in _CAPACITY_KEYS:
|
|
value = raw.get(key)
|
|
if isinstance(value, (str, int, float)) and value != "":
|
|
output[key] = str(value)
|
|
return output
|
|
|
|
|
|
def _node_pressure_conditions(conditions: Any) -> dict[str, bool]:
|
|
if not isinstance(conditions, list):
|
|
return {}
|
|
pressure: dict[str, bool] = {}
|
|
for condition in conditions:
|
|
if not isinstance(condition, dict):
|
|
continue
|
|
cond_type = condition.get("type")
|
|
if cond_type in _PRESSURE_TYPES:
|
|
pressure[cond_type] = condition.get("status") == "True"
|
|
return pressure
|
|
|
|
|
|
def _node_roles(labels: dict[str, Any]) -> list[str]:
|
|
roles: list[str] = []
|
|
for key in labels.keys():
|
|
if key.startswith("node-role.kubernetes.io/"):
|
|
role = key.split("/", 1)[-1]
|
|
if role:
|
|
roles.append(role)
|
|
return sorted(set(roles))
|
|
|
|
|
|
def _node_is_worker(labels: dict[str, Any]) -> bool:
|
|
if "node-role.kubernetes.io/control-plane" in labels:
|
|
return False
|
|
if "node-role.kubernetes.io/master" in labels:
|
|
return False
|
|
if "node-role.kubernetes.io/worker" in labels:
|
|
return True
|
|
return True
|
|
|
|
|
|
def _hardware_hint(labels: dict[str, Any], node_info: dict[str, Any]) -> str:
|
|
result = "unknown"
|
|
if str(labels.get("jetson") or "").lower() == "true":
|
|
result = "jetson"
|
|
else:
|
|
hardware = (labels.get("hardware") or "").strip().lower()
|
|
if hardware:
|
|
result = hardware
|
|
else:
|
|
kernel = str(node_info.get("kernelVersion") or "").lower()
|
|
os_image = str(node_info.get("osImage") or "").lower()
|
|
if "tegra" in kernel or "jetson" in os_image:
|
|
result = "jetson"
|
|
elif "raspi" in kernel or "bcm2711" in kernel:
|
|
result = "rpi"
|
|
else:
|
|
arch = str(node_info.get("architecture") or "").lower()
|
|
if arch == "amd64":
|
|
result = "amd64"
|
|
elif arch == "arm64":
|
|
result = "arm64-unknown"
|
|
return result
|
|
|
|
|
|
def _condition_status(conditions: Any, cond_type: str) -> tuple[bool | None, str, str]:
|
|
if not isinstance(conditions, list):
|
|
return None, "", ""
|
|
for condition in conditions:
|
|
if not isinstance(condition, dict):
|
|
continue
|
|
if condition.get("type") != cond_type:
|
|
continue
|
|
status = condition.get("status")
|
|
if status == "True":
|
|
return True, condition.get("reason") or "", condition.get("message") or ""
|
|
if status == "False":
|
|
return False, condition.get("reason") or "", condition.get("message") or ""
|
|
return None, condition.get("reason") or "", condition.get("message") or ""
|
|
return None, "", ""
|
|
|
|
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|