149 lines
6.7 KiB
Python
149 lines
6.7 KiB
Python
from __future__ import annotations
|
|
|
|
from ariadne.services import cluster_state_anomalies as anomalies
|
|
from ariadne.services import cluster_state_attention as attention
|
|
from ariadne.services import cluster_state_health as health
|
|
from ariadne.services import cluster_state_profiles as profiles
|
|
from ariadne.services import cluster_state_relationships as relationships
|
|
from ariadne.services import cluster_state_signals as signals
|
|
from ariadne.services.cluster_state_contract import SignalContext
|
|
|
|
|
|
def test_relationship_context_and_cross_stats() -> None:
|
|
namespace_context = relationships._namespace_context(
|
|
[{"namespace": "apps", "pods_total": 4, "pods_running": 3, "primary_node": "titan-1"}],
|
|
[{"namespace": "apps", "nodes": {"titan-1": 3}, "primary_node": "titan-1"}],
|
|
[
|
|
{
|
|
"namespace": "apps",
|
|
"cpu_usage": 2.0,
|
|
"mem_usage": 4.0,
|
|
"cpu_usage_ratio": 1.5,
|
|
"mem_usage_ratio": 0.5,
|
|
}
|
|
],
|
|
{"apps": {"cpu": {"avg": 1.0}, "mem": {"avg": 2.0}}},
|
|
)
|
|
assert namespace_context[0]["baseline_delta"]["cpu"] == 100.0
|
|
assert relationships._namespace_nodes_top(namespace_context, 1)[0]["namespace"] == "apps"
|
|
|
|
workloads = [{"namespace": "apps", "workload": "api", "nodes": {"titan-1": 2}, "pods_total": 2}]
|
|
node_workloads = relationships._node_workload_map(workloads)
|
|
assert node_workloads["titan-1"] == {"apps/api": 2}
|
|
assert relationships._workload_nodes_top(workloads, 1)[0]["workload"] == "api"
|
|
assert relationships._node_workloads_top(node_workloads)[0]["node"] == "titan-1"
|
|
assert relationships._workload_index(workloads)[0]["workload"] == "api"
|
|
|
|
node_context = relationships._node_context(
|
|
[
|
|
{
|
|
"name": "titan-1",
|
|
"ready": True,
|
|
"hardware": "rpi5",
|
|
"arch": "arm64",
|
|
"roles": ["worker"],
|
|
}
|
|
],
|
|
[{"node": "titan-1", "cpu": 80.0, "ram": 40.0, "load_index": 0.9}],
|
|
{"titan-1": {"cpu": {"avg": 40.0}, "ram": {"avg": 20.0}}},
|
|
node_workloads,
|
|
)
|
|
assert node_context[0]["baseline_delta"]["cpu"] == 100.0
|
|
metrics = {
|
|
"node_usage": {"cpu": [{"node": "titan-1", "value": 80.0}]},
|
|
"namespace_top": {"cpu": [{"namespace": "apps", "value": 2.0}]},
|
|
"pvc_usage_top": [{"metric": {"namespace": "apps", "persistentvolumeclaim": "data"}, "value": 91.0}],
|
|
}
|
|
assert relationships._cross_node_metric_top(metrics, node_context)[0]["node"] == "titan-1"
|
|
assert relationships._cross_namespace_metric_top(metrics, namespace_context)[0]["namespace"] == "apps"
|
|
assert relationships._build_cross_stats(metrics, node_context, namespace_context, workloads)["node_metric_top"]
|
|
assert relationships._build_lexicon()["aliases"]["hot node"]
|
|
assert relationships._delta_top(node_context, "cpu")[0]["severity"] == "critical"
|
|
assert relationships._reason_top({"OOMKilled": 2})[0]["reason"] == "OOMKilled"
|
|
|
|
|
|
def test_health_anomaly_signal_profile_and_attention_domains() -> None:
|
|
metrics = {
|
|
"nodes_total": 2,
|
|
"nodes_ready": 1,
|
|
"pods_running": 8,
|
|
"pods_pending": 2,
|
|
"pods_failed": 1,
|
|
"job_failures_24h": [{"value": 1, "metric": {"job_name": "job"}}],
|
|
"pvc_usage_top": [{"metric": {"namespace": "apps", "persistentvolumeclaim": "data"}, "value": 92.0}],
|
|
"top_restarts_1h": [{"metric": {"namespace": "apps", "pod": "api"}, "value": 3}],
|
|
}
|
|
nodes_summary = {"pressure_nodes": {"DiskPressure": ["titan-1"]}, "unschedulable_nodes": ["titan-2"]}
|
|
workloads_health = {
|
|
"deployments": {"not_ready": 1, "items": [{"namespace": "apps", "name": "api", "desired": 2, "ready": 1}]},
|
|
"statefulsets": {"not_ready": 0, "items": []},
|
|
"daemonsets": {"not_ready": 0, "items": []},
|
|
}
|
|
pod_issues = {
|
|
"pending_over_15m": 2,
|
|
"counts": {"Failed": 1},
|
|
"waiting_reasons": {"CrashLoopBackOff": 3},
|
|
"phase_reasons": {"Evicted": 1},
|
|
}
|
|
kustomizations = {"not_ready": 1, "items": [{"name": "apps"}]}
|
|
events = {"warnings_total": 1, "warnings": [{"reason": "BackOff"}]}
|
|
|
|
anomaly_rows = anomalies._build_anomalies(metrics, nodes_summary, workloads_health, kustomizations, events)
|
|
assert {row["kind"] for row in anomaly_rows} >= {"pods_pending", "pvc_pressure", "flux_not_ready"}
|
|
assert anomalies._severity_rank("critical") == 0
|
|
assert anomalies._pvc_pressure_signals(metrics)[0]["target"] == "apps/data"
|
|
|
|
bullets = health._health_bullets(metrics, nodes_summary, workloads_health, anomaly_rows)
|
|
assert bullets[0] == "Nodes ready: 1/2"
|
|
assert health._workload_not_ready_items(workloads_health)[0]["name"] == "api"
|
|
assert health._pod_restarts_top(metrics)[0]["pod"] == "api"
|
|
|
|
node_context = [
|
|
{
|
|
"node": "titan-1",
|
|
"ready": True,
|
|
"hardware": "rpi5",
|
|
"arch": "arm64",
|
|
"roles": ["worker"],
|
|
"cpu": 90.0,
|
|
"ram": 85.0,
|
|
"disk": 95.0,
|
|
"net": 50.0,
|
|
"io": 60.0,
|
|
"load_index": 0.95,
|
|
"baseline": {"net": {"max": 10.0}, "io": {"max": 20.0}},
|
|
"baseline_delta": {"cpu": 100.0},
|
|
"pressure_flags": ["DiskPressure"],
|
|
}
|
|
]
|
|
namespace_context = [
|
|
{
|
|
"namespace": "apps",
|
|
"pods_total": 4,
|
|
"pods_running": 3,
|
|
"primary_node": "titan-1",
|
|
"nodes_top": [("titan-1", 4)],
|
|
"cpu_usage": 2.0,
|
|
"mem_usage": 4.0,
|
|
"cpu_ratio": 1.5,
|
|
"mem_ratio": 0.5,
|
|
"baseline_delta": {"cpu": 100.0},
|
|
"baseline": {"cpu": {"avg": 1.0}},
|
|
}
|
|
]
|
|
context = SignalContext(metrics, node_context, namespace_context, workloads_health, pod_issues, kustomizations)
|
|
assert signals._pod_issue_summary(pod_issues, metrics)["waiting_reasons_top"][0]["reason"] == "CrashLoopBackOff"
|
|
assert signals._build_signals(context)
|
|
|
|
node_pods = [{"node": "titan-1", "pods_total": 4, "pods_running": 3, "namespaces_top": [("apps", 4)]}]
|
|
node_workloads = {"titan-1": {"apps/api": 2}}
|
|
workloads = [{"namespace": "apps", "workload": "api", "pods_total": 2, "pods_running": 1, "nodes": {"titan-1": 2}}]
|
|
built_profiles = profiles._build_profiles(node_context, namespace_context, node_pods, workloads, node_workloads)
|
|
assert built_profiles["nodes"][0]["node"] == "titan-1"
|
|
assert built_profiles["namespaces"][0]["namespace"] == "apps"
|
|
assert built_profiles["workloads"][0]["workload"] == "api"
|
|
|
|
ranked = attention._build_attention_ranked(metrics, node_context, pod_issues, workloads_health)
|
|
assert ranked[0]["score"] > 0
|
|
assert attention._node_attention_score(node_context[0])[0] > 0
|