from __future__ import annotations from ariadne.services import cluster_state_anomalies as anomalies from ariadne.services import cluster_state_attention as attention from ariadne.services import cluster_state_health as health from ariadne.services import cluster_state_profiles as profiles from ariadne.services import cluster_state_relationships as relationships from ariadne.services import cluster_state_signals as signals from ariadne.services.cluster_state_contract import SignalContext def test_relationship_context_and_cross_stats() -> None: namespace_context = relationships._namespace_context( [{"namespace": "apps", "pods_total": 4, "pods_running": 3, "primary_node": "titan-1"}], [{"namespace": "apps", "nodes": {"titan-1": 3}, "primary_node": "titan-1"}], [ { "namespace": "apps", "cpu_usage": 2.0, "mem_usage": 4.0, "cpu_usage_ratio": 1.5, "mem_usage_ratio": 0.5, } ], {"apps": {"cpu": {"avg": 1.0}, "mem": {"avg": 2.0}}}, ) assert namespace_context[0]["baseline_delta"]["cpu"] == 100.0 assert relationships._namespace_nodes_top(namespace_context, 1)[0]["namespace"] == "apps" workloads = [{"namespace": "apps", "workload": "api", "nodes": {"titan-1": 2}, "pods_total": 2}] node_workloads = relationships._node_workload_map(workloads) assert node_workloads["titan-1"] == {"apps/api": 2} assert relationships._workload_nodes_top(workloads, 1)[0]["workload"] == "api" assert relationships._node_workloads_top(node_workloads)[0]["node"] == "titan-1" assert relationships._workload_index(workloads)[0]["workload"] == "api" node_context = relationships._node_context( [ { "name": "titan-1", "ready": True, "hardware": "rpi5", "arch": "arm64", "roles": ["worker"], } ], [{"node": "titan-1", "cpu": 80.0, "ram": 40.0, "load_index": 0.9}], {"titan-1": {"cpu": {"avg": 40.0}, "ram": {"avg": 20.0}}}, node_workloads, ) assert node_context[0]["baseline_delta"]["cpu"] == 100.0 metrics = { "node_usage": {"cpu": [{"node": "titan-1", "value": 80.0}]}, "namespace_top": {"cpu": [{"namespace": "apps", "value": 2.0}]}, "pvc_usage_top": [{"metric": {"namespace": "apps", "persistentvolumeclaim": "data"}, "value": 91.0}], } assert relationships._cross_node_metric_top(metrics, node_context)[0]["node"] == "titan-1" assert relationships._cross_namespace_metric_top(metrics, namespace_context)[0]["namespace"] == "apps" assert relationships._build_cross_stats(metrics, node_context, namespace_context, workloads)["node_metric_top"] assert relationships._build_lexicon()["aliases"]["hot node"] assert relationships._delta_top(node_context, "cpu")[0]["severity"] == "critical" assert relationships._reason_top({"OOMKilled": 2})[0]["reason"] == "OOMKilled" def test_relationship_edge_filters_and_baseline_helpers(monkeypatch) -> None: assert relationships._vector_to_named([None, {"metric": {}, "value": 1}], "node", "node") == [] assert relationships._vector_to_named( [{"metric": {"node": "titan-1"}, "value": 2}, {"metric": {"node": "titan-2"}, "value": 1}], "node", "node", )[0]["node"] == "titan-1" assert relationships._pvc_top([{"metric": {}}, {"metric": {"namespace": "apps", "persistentvolumeclaim": "data"}, "value": 90}]) == [ {"namespace": "apps", "pvc": "data", "used_percent": 90} ] namespace_context = relationships._namespace_context( [None, {"namespace": ""}, {"namespace": "apps", "pods_total": 1}], [{"namespace": "apps", "nodes": "bad"}], [{"namespace": "apps", "cpu_usage": "bad", "mem_usage": 2.0}], "bad", ) assert namespace_context[0]["namespace"] == "apps" assert relationships._namespace_nodes_top([None, namespace_context[0]], 2)[0]["namespace"] == "apps" workloads = [ None, {"namespace": "apps", "nodes": {"titan-1": 1}}, {"namespace": "apps", "workload": "api", "nodes": "bad"}, {"namespace": "apps", "workload": "api", "nodes": {"": 1, "titan-1": "2", "titan-2": "bad", "titan-3": 0}}, {"workload": "solo", "nodes": {"titan-2": 1}}, ] node_workloads = relationships._node_workload_map(workloads) assert node_workloads == {"titan-1": {"apps/api": 2}, "titan-2": {"solo": 1}} assert relationships._node_workloads_top({"": {}, "titan-1": "bad", "titan-2": {"solo": 1}}, limit_nodes=2)[0]["node"] == "titan-2" assert relationships._workload_index([{"workload": "api", "pods_total": 1, "nodes": "bad"}])[0]["nodes_top"] == [] assert relationships._events_summary("bad") == {} events = {"warnings_total": 2, "warnings_by_namespace": {"apps": 2, "db": 1}, "warnings_recent": [1, 2]} assert relationships._events_summary(events)["top_namespace"] == {"namespace": "apps", "count": 2} assert relationships._top_named_entries([None, {"node": ""}, {"node": "n1", "value": "bad"}, {"node": "n2", "value": "2"}], "node", 3) == [ {"name": "n2", "value": 2.0}, {"name": "n1", "value": 0.0}, ] monkeypatch.setattr(relationships, "_top_named_entries", lambda *_args: [{"name": ""}]) assert relationships._cross_node_metric_top({"node_usage": {"cpu": [{}]}}, []) == [] assert relationships._cross_namespace_metric_top({"namespace_top": {"cpu": [{}]}}, []) == [] node_context = relationships._node_context( [None, {"name": ""}, {"name": "titan-1", "pressure": ["DiskPressure"]}], [{"node": "titan-1", "cpu": "bad", "load_index": 1.0}], "bad", "bad", ) assert node_context[0]["node"] == "titan-1" assert relationships._baseline_delta("bad", {"avg": 1}) is None assert relationships._baseline_delta(1, {"avg": 0}) is None assert relationships._delta_severity(50) == "warning" assert relationships._delta_severity(5) == "info" assert relationships._delta_entry_label({"namespace": "apps"}) == ("namespace", "apps") assert relationships._delta_top([None, {"namespace": "apps", "baseline_delta": {"cpu": 5}}], "cpu")[0]["namespace"] == "apps" assert relationships._reason_top({"": 1, "OOMKilled": "bad", "BackOff": 2}) == [{"reason": "BackOff", "count": 2}] def test_health_anomaly_signal_profile_and_attention_domains() -> None: metrics = { "nodes_total": 2, "nodes_ready": 1, "pods_running": 8, "pods_pending": 2, "pods_failed": 1, "job_failures_24h": [{"value": 1, "metric": {"job_name": "job"}}], "pvc_usage_top": [{"metric": {"namespace": "apps", "persistentvolumeclaim": "data"}, "value": 92.0}], "top_restarts_1h": [{"metric": {"namespace": "apps", "pod": "api"}, "value": 3}], } nodes_summary = {"pressure_nodes": {"DiskPressure": ["titan-1"]}, "unschedulable_nodes": ["titan-2"]} workloads_health = { "deployments": {"not_ready": 1, "items": [{"namespace": "apps", "name": "api", "desired": 2, "ready": 1}]}, "statefulsets": {"not_ready": 0, "items": []}, "daemonsets": {"not_ready": 0, "items": []}, } pod_issues = { "pending_over_15m": 2, "counts": {"Failed": 1}, "waiting_reasons": {"CrashLoopBackOff": 3}, "phase_reasons": {"Evicted": 1}, } kustomizations = {"not_ready": 1, "items": [{"name": "apps"}]} events = {"warnings_total": 1, "warnings": [{"reason": "BackOff"}]} anomaly_rows = anomalies._build_anomalies(metrics, nodes_summary, workloads_health, kustomizations, events) assert {row["kind"] for row in anomaly_rows} >= {"pods_pending", "pvc_pressure", "flux_not_ready"} assert anomalies._severity_rank("critical") == 0 assert anomalies._pvc_pressure_signals(metrics)[0]["target"] == "apps/data" bullets = health._health_bullets(metrics, nodes_summary, workloads_health, anomaly_rows) assert bullets[0] == "Nodes ready: 1/2" assert health._workload_not_ready_items(workloads_health)[0]["name"] == "api" assert health._pod_restarts_top(metrics)[0]["pod"] == "api" node_context = [ { "node": "titan-1", "ready": True, "hardware": "rpi5", "arch": "arm64", "roles": ["worker"], "cpu": 90.0, "ram": 85.0, "disk": 95.0, "net": 50.0, "io": 60.0, "load_index": 0.95, "baseline": {"net": {"max": 10.0}, "io": {"max": 20.0}}, "baseline_delta": {"cpu": 100.0}, "pressure_flags": ["DiskPressure"], } ] namespace_context = [ { "namespace": "apps", "pods_total": 4, "pods_running": 3, "primary_node": "titan-1", "nodes_top": [("titan-1", 4)], "cpu_usage": 2.0, "mem_usage": 4.0, "cpu_ratio": 1.5, "mem_ratio": 0.5, "baseline_delta": {"cpu": 100.0}, "baseline": {"cpu": {"avg": 1.0}}, } ] context = SignalContext(metrics, node_context, namespace_context, workloads_health, pod_issues, kustomizations) assert signals._pod_issue_summary(pod_issues, metrics)["waiting_reasons_top"][0]["reason"] == "CrashLoopBackOff" assert signals._build_signals(context) node_pods = [{"node": "titan-1", "pods_total": 4, "pods_running": 3, "namespaces_top": [("apps", 4)]}] node_workloads = {"titan-1": {"apps/api": 2}} workloads = [{"namespace": "apps", "workload": "api", "pods_total": 2, "pods_running": 1, "nodes": {"titan-1": 2}}] built_profiles = profiles._build_profiles(node_context, namespace_context, node_pods, workloads, node_workloads) assert built_profiles["nodes"][0]["node"] == "titan-1" assert built_profiles["namespaces"][0]["namespace"] == "apps" assert built_profiles["workloads"][0]["workload"] == "api" ranked = attention._build_attention_ranked(metrics, node_context, pod_issues, workloads_health) assert ranked[0]["score"] > 0 assert attention._node_attention_score(node_context[0])[0] > 0 def test_profile_builders_filter_bad_nodes_and_workload_nodes() -> None: node_profiles = profiles._node_profiles( [ None, {"node": ""}, {"node": 123}, {"node": "titan-2", "load_index": 1.0}, {"node": "titan-1", "load_index": 2.0}, ], [{"node": "titan-1", "pods_total": 3, "pods_running": 2}], {"titan-1": {"apps/api": 2, "apps/worker": 1}}, ) assert [profile["node"] for profile in node_profiles] == ["titan-1", "titan-2"] assert node_profiles[0]["pods_total"] == 3 assert node_profiles[0]["workloads_top"] == [("apps/api", 2), ("apps/worker", 1)] assert node_profiles[1]["namespaces_top"] == [] workload_profiles = profiles._workload_profiles( [ None, {"namespace": "apps", "workload": "api", "pods_total": 2, "nodes": {"titan-2": 1, "titan-1": 2}}, {"namespace": "apps", "workload": "bad", "pods_total": 1, "nodes": "not-a-map"}, ] ) assert workload_profiles[0]["nodes_top"] == [("titan-1", 2), ("titan-2", 1)] assert workload_profiles[1]["nodes_top"] == [] def test_health_helpers_handle_clean_and_malformed_inputs() -> None: bullets = health._health_bullets( {"pods_running": 1, "pods_pending": 0, "pods_failed": 0}, {}, {"deployments": {"not_ready": 0}, "statefulsets": "bad", "daemonsets": {}}, [{"summary": ""}], ) assert bullets == ["Pods: 1 running, 0 pending, 0 failed", "Workloads: all ready"] workload_items = health._workload_not_ready_items( { "deployments": {"items": [None, {"namespace": "apps", "name": "api", "desired": 2, "ready": 1}]}, "statefulsets": {"items": "bad"}, } ) assert workload_items == [{"kind": "deployment", "namespace": "apps", "name": "api", "desired": 2, "ready": 1}] restarts = health._pod_restarts_top( { "top_restarts_1h": [ None, {"metric": {"namespace": "apps"}, "value": 3}, {"metric": {"namespace": "apps", "pod": "api"}, "value": 2}, ] } ) assert restarts == [{"namespace": "apps", "pod": "api", "value": 2}]