diff --git a/ariadne/services/cluster_state.py b/ariadne/services/cluster_state.py index fdef6c1..2e473d7 100644 --- a/ariadne/services/cluster_state.py +++ b/ariadne/services/cluster_state.py @@ -66,6 +66,53 @@ _LOAD_TOP_COUNT = 5 _NAMESPACE_TOP_COUNT = 5 +def _node_usage_by_hardware(node_load: list[dict[str, Any]], node_details: list[dict[str, Any]]) -> list[dict[str, Any]]: + if not node_load or not node_details: + return [] + hardware_by_node = _hardware_map(node_details) + buckets: dict[str, dict[str, list[float]]] = {} + for entry in node_load: + if not isinstance(entry, dict): + continue + node = entry.get("node") + if not isinstance(node, str) or not node: + continue + hardware = hardware_by_node.get(node, "unknown") + _append_hardware_usage(buckets, str(hardware), entry) + return _finalize_hardware_usage(buckets) + + +def _hardware_map(node_details: list[dict[str, Any]]) -> dict[str, str]: + mapping: dict[str, str] = {} + for node in node_details: + if not isinstance(node, dict): + continue + name = node.get("name") + if isinstance(name, str) and name: + mapping[name] = str(node.get("hardware") or "unknown") + return mapping + + +def _append_hardware_usage(buckets: dict[str, dict[str, list[float]]], hardware: str, entry: dict[str, Any]) -> None: + bucket = buckets.setdefault(hardware, {"load_index": [], "cpu": [], "ram": [], "net": [], "io": []}) + for key in ("load_index", "cpu", "ram", "net", "io"): + value = entry.get(key) + if isinstance(value, (int, float)): + bucket[key].append(float(value)) + + +def _finalize_hardware_usage(buckets: dict[str, dict[str, list[float]]]) -> list[dict[str, Any]]: + output: list[dict[str, Any]] = [] + for hardware, metrics in buckets.items(): + row: dict[str, Any] = {"hardware": hardware} + for key, values in metrics.items(): + if values: + row[key] = sum(values) / len(values) + output.append(row) + output.sort(key=lambda item: (-(item.get("load_index") or 0), item.get("hardware") or "")) + return output + + @dataclass(frozen=True) class ClusterStateSummary: nodes_total: int | None @@ -1771,6 +1818,7 @@ def collect_cluster_state() -> tuple[dict[str, Any], ClusterStateSummary]: node_pods, ) metrics["node_load_summary"] = _node_load_summary(metrics.get("node_load", [])) + metrics["node_load_by_hardware"] = _node_usage_by_hardware(metrics.get("node_load", []), node_details) snapshot = { "collected_at": collected_at.isoformat(),