ariadne/tests/unit/services/test_cluster_state_context_domains.py

239 lines
11 KiB
Python

from __future__ import annotations
from ariadne.services import cluster_state_anomalies as anomalies
from ariadne.services import cluster_state_attention as attention
from ariadne.services import cluster_state_health as health
from ariadne.services import cluster_state_profiles as profiles
from ariadne.services import cluster_state_relationships as relationships
from ariadne.services import cluster_state_signals as signals
from ariadne.services.cluster_state_contract import SignalContext
def test_relationship_context_and_cross_stats() -> None:
namespace_context = relationships._namespace_context(
[{"namespace": "apps", "pods_total": 4, "pods_running": 3, "primary_node": "titan-1"}],
[{"namespace": "apps", "nodes": {"titan-1": 3}, "primary_node": "titan-1"}],
[
{
"namespace": "apps",
"cpu_usage": 2.0,
"mem_usage": 4.0,
"cpu_usage_ratio": 1.5,
"mem_usage_ratio": 0.5,
}
],
{"apps": {"cpu": {"avg": 1.0}, "mem": {"avg": 2.0}}},
)
assert namespace_context[0]["baseline_delta"]["cpu"] == 100.0
assert relationships._namespace_nodes_top(namespace_context, 1)[0]["namespace"] == "apps"
workloads = [{"namespace": "apps", "workload": "api", "nodes": {"titan-1": 2}, "pods_total": 2}]
node_workloads = relationships._node_workload_map(workloads)
assert node_workloads["titan-1"] == {"apps/api": 2}
assert relationships._workload_nodes_top(workloads, 1)[0]["workload"] == "api"
assert relationships._node_workloads_top(node_workloads)[0]["node"] == "titan-1"
assert relationships._workload_index(workloads)[0]["workload"] == "api"
node_context = relationships._node_context(
[
{
"name": "titan-1",
"ready": True,
"hardware": "rpi5",
"arch": "arm64",
"roles": ["worker"],
}
],
[{"node": "titan-1", "cpu": 80.0, "ram": 40.0, "load_index": 0.9}],
{"titan-1": {"cpu": {"avg": 40.0}, "ram": {"avg": 20.0}}},
node_workloads,
)
assert node_context[0]["baseline_delta"]["cpu"] == 100.0
metrics = {
"node_usage": {"cpu": [{"node": "titan-1", "value": 80.0}]},
"namespace_top": {"cpu": [{"namespace": "apps", "value": 2.0}]},
"pvc_usage_top": [{"metric": {"namespace": "apps", "persistentvolumeclaim": "data"}, "value": 91.0}],
}
assert relationships._cross_node_metric_top(metrics, node_context)[0]["node"] == "titan-1"
assert relationships._cross_namespace_metric_top(metrics, namespace_context)[0]["namespace"] == "apps"
assert relationships._build_cross_stats(metrics, node_context, namespace_context, workloads)["node_metric_top"]
assert relationships._build_lexicon()["aliases"]["hot node"]
assert relationships._delta_top(node_context, "cpu")[0]["severity"] == "critical"
assert relationships._reason_top({"OOMKilled": 2})[0]["reason"] == "OOMKilled"
def test_relationship_edge_filters_and_baseline_helpers(monkeypatch) -> None:
assert relationships._vector_to_named([None, {"metric": {}, "value": 1}], "node", "node") == []
assert relationships._vector_to_named(
[{"metric": {"node": "titan-1"}, "value": 2}, {"metric": {"node": "titan-2"}, "value": 1}],
"node",
"node",
)[0]["node"] == "titan-1"
assert relationships._pvc_top([{"metric": {}}, {"metric": {"namespace": "apps", "persistentvolumeclaim": "data"}, "value": 90}]) == [
{"namespace": "apps", "pvc": "data", "used_percent": 90}
]
namespace_context = relationships._namespace_context(
[None, {"namespace": ""}, {"namespace": "apps", "pods_total": 1}],
[{"namespace": "apps", "nodes": "bad"}],
[{"namespace": "apps", "cpu_usage": "bad", "mem_usage": 2.0}],
"bad",
)
assert namespace_context[0]["namespace"] == "apps"
assert relationships._namespace_nodes_top([None, namespace_context[0]], 2)[0]["namespace"] == "apps"
workloads = [
None,
{"namespace": "apps", "nodes": {"titan-1": 1}},
{"namespace": "apps", "workload": "api", "nodes": "bad"},
{"namespace": "apps", "workload": "api", "nodes": {"": 1, "titan-1": "2", "titan-2": "bad", "titan-3": 0}},
{"workload": "solo", "nodes": {"titan-2": 1}},
]
node_workloads = relationships._node_workload_map(workloads)
assert node_workloads == {"titan-1": {"apps/api": 2}, "titan-2": {"solo": 1}}
assert relationships._node_workloads_top({"": {}, "titan-1": "bad", "titan-2": {"solo": 1}}, limit_nodes=2)[0]["node"] == "titan-2"
assert relationships._workload_index([{"workload": "api", "pods_total": 1, "nodes": "bad"}])[0]["nodes_top"] == []
assert relationships._events_summary("bad") == {}
events = {"warnings_total": 2, "warnings_by_namespace": {"apps": 2, "db": 1}, "warnings_recent": [1, 2]}
assert relationships._events_summary(events)["top_namespace"] == {"namespace": "apps", "count": 2}
assert relationships._top_named_entries([None, {"node": ""}, {"node": "n1", "value": "bad"}, {"node": "n2", "value": "2"}], "node", 3) == [
{"name": "n2", "value": 2.0},
{"name": "n1", "value": 0.0},
]
monkeypatch.setattr(relationships, "_top_named_entries", lambda *_args: [{"name": ""}])
assert relationships._cross_node_metric_top({"node_usage": {"cpu": [{}]}}, []) == []
assert relationships._cross_namespace_metric_top({"namespace_top": {"cpu": [{}]}}, []) == []
node_context = relationships._node_context(
[None, {"name": ""}, {"name": "titan-1", "pressure": ["DiskPressure"]}],
[{"node": "titan-1", "cpu": "bad", "load_index": 1.0}],
"bad",
"bad",
)
assert node_context[0]["node"] == "titan-1"
assert relationships._baseline_delta("bad", {"avg": 1}) is None
assert relationships._baseline_delta(1, {"avg": 0}) is None
assert relationships._delta_severity(50) == "warning"
assert relationships._delta_severity(5) == "info"
assert relationships._delta_entry_label({"namespace": "apps"}) == ("namespace", "apps")
assert relationships._delta_top([None, {"namespace": "apps", "baseline_delta": {"cpu": 5}}], "cpu")[0]["namespace"] == "apps"
assert relationships._reason_top({"": 1, "OOMKilled": "bad", "BackOff": 2}) == [{"reason": "BackOff", "count": 2}]
def test_health_anomaly_signal_profile_and_attention_domains() -> None:
metrics = {
"nodes_total": 2,
"nodes_ready": 1,
"pods_running": 8,
"pods_pending": 2,
"pods_failed": 1,
"job_failures_24h": [{"value": 1, "metric": {"job_name": "job"}}],
"pvc_usage_top": [{"metric": {"namespace": "apps", "persistentvolumeclaim": "data"}, "value": 92.0}],
"top_restarts_1h": [{"metric": {"namespace": "apps", "pod": "api"}, "value": 3}],
}
nodes_summary = {"pressure_nodes": {"DiskPressure": ["titan-1"]}, "unschedulable_nodes": ["titan-2"]}
workloads_health = {
"deployments": {"not_ready": 1, "items": [{"namespace": "apps", "name": "api", "desired": 2, "ready": 1}]},
"statefulsets": {"not_ready": 0, "items": []},
"daemonsets": {"not_ready": 0, "items": []},
}
pod_issues = {
"pending_over_15m": 2,
"counts": {"Failed": 1},
"waiting_reasons": {"CrashLoopBackOff": 3},
"phase_reasons": {"Evicted": 1},
}
kustomizations = {"not_ready": 1, "items": [{"name": "apps"}]}
events = {"warnings_total": 1, "warnings": [{"reason": "BackOff"}]}
anomaly_rows = anomalies._build_anomalies(metrics, nodes_summary, workloads_health, kustomizations, events)
assert {row["kind"] for row in anomaly_rows} >= {"pods_pending", "pvc_pressure", "flux_not_ready"}
assert anomalies._severity_rank("critical") == 0
assert anomalies._pvc_pressure_signals(metrics)[0]["target"] == "apps/data"
bullets = health._health_bullets(metrics, nodes_summary, workloads_health, anomaly_rows)
assert bullets[0] == "Nodes ready: 1/2"
assert health._workload_not_ready_items(workloads_health)[0]["name"] == "api"
assert health._pod_restarts_top(metrics)[0]["pod"] == "api"
node_context = [
{
"node": "titan-1",
"ready": True,
"hardware": "rpi5",
"arch": "arm64",
"roles": ["worker"],
"cpu": 90.0,
"ram": 85.0,
"disk": 95.0,
"net": 50.0,
"io": 60.0,
"load_index": 0.95,
"baseline": {"net": {"max": 10.0}, "io": {"max": 20.0}},
"baseline_delta": {"cpu": 100.0},
"pressure_flags": ["DiskPressure"],
}
]
namespace_context = [
{
"namespace": "apps",
"pods_total": 4,
"pods_running": 3,
"primary_node": "titan-1",
"nodes_top": [("titan-1", 4)],
"cpu_usage": 2.0,
"mem_usage": 4.0,
"cpu_ratio": 1.5,
"mem_ratio": 0.5,
"baseline_delta": {"cpu": 100.0},
"baseline": {"cpu": {"avg": 1.0}},
}
]
context = SignalContext(metrics, node_context, namespace_context, workloads_health, pod_issues, kustomizations)
assert signals._pod_issue_summary(pod_issues, metrics)["waiting_reasons_top"][0]["reason"] == "CrashLoopBackOff"
assert signals._build_signals(context)
node_pods = [{"node": "titan-1", "pods_total": 4, "pods_running": 3, "namespaces_top": [("apps", 4)]}]
node_workloads = {"titan-1": {"apps/api": 2}}
workloads = [{"namespace": "apps", "workload": "api", "pods_total": 2, "pods_running": 1, "nodes": {"titan-1": 2}}]
built_profiles = profiles._build_profiles(node_context, namespace_context, node_pods, workloads, node_workloads)
assert built_profiles["nodes"][0]["node"] == "titan-1"
assert built_profiles["namespaces"][0]["namespace"] == "apps"
assert built_profiles["workloads"][0]["workload"] == "api"
ranked = attention._build_attention_ranked(metrics, node_context, pod_issues, workloads_health)
assert ranked[0]["score"] > 0
assert attention._node_attention_score(node_context[0])[0] > 0
def test_profile_builders_filter_bad_nodes_and_workload_nodes() -> None:
node_profiles = profiles._node_profiles(
[
None,
{"node": ""},
{"node": 123},
{"node": "titan-2", "load_index": 1.0},
{"node": "titan-1", "load_index": 2.0},
],
[{"node": "titan-1", "pods_total": 3, "pods_running": 2}],
{"titan-1": {"apps/api": 2, "apps/worker": 1}},
)
assert [profile["node"] for profile in node_profiles] == ["titan-1", "titan-2"]
assert node_profiles[0]["pods_total"] == 3
assert node_profiles[0]["workloads_top"] == [("apps/api", 2), ("apps/worker", 1)]
assert node_profiles[1]["namespaces_top"] == []
workload_profiles = profiles._workload_profiles(
[
None,
{"namespace": "apps", "workload": "api", "pods_total": 2, "nodes": {"titan-2": 1, "titan-1": 2}},
{"namespace": "apps", "workload": "bad", "pods_total": 1, "nodes": "not-a-map"},
]
)
assert workload_profiles[0]["nodes_top"] == [("titan-1", 2), ("titan-2", 1)]
assert workload_profiles[1]["nodes_top"] == []