Compare commits

...

25 Commits

Author SHA1 Message Date
5cf80feb33 cluster_state: add delta and pod issue summaries 2026-02-03 22:48:16 -03:00
bdb94ffbe1 cluster: add lexicon and cross stats 2026-01-31 21:37:59 -03:00
a5b35848d0 cluster state: add issue hot spots and trends 2026-01-31 13:40:05 -03:00
8571ef6f4d cluster state: expand time-series summaries 2026-01-31 13:37:25 -03:00
69fd48d45b cluster: expand alerts and offenders snapshot 2026-01-30 21:52:10 -03:00
bd732883b1 chore: trigger build 2026-01-30 21:38:07 -03:00
7ee450228f cluster: enrich time-aware snapshot 2026-01-30 20:52:29 -03:00
4c011ca6f1 cluster_state: tidy signals input 2026-01-30 17:15:52 -03:00
37284ea7ac cluster_state: add topology and event summaries 2026-01-30 17:04:22 -03:00
0ad5f2afae cluster_state: add signals and profiles 2026-01-30 16:51:31 -03:00
8446c1f032 cluster-state: add baseline deltas and relationships 2026-01-30 10:20:35 -03:00
9276d2538a cluster-state: add baselines and attention ranking 2026-01-30 02:16:32 -03:00
d5086a0b98 cluster-state: refactor anomalies 2026-01-30 00:14:10 -03:00
56ea582c97 cluster-state: enrich snapshot substrate 2026-01-30 00:09:53 -03:00
3d21506ff0 cluster: summarize node load by hardware 2026-01-29 16:53:39 -03:00
ef2ede2443 cluster: include overcommitted namespaces 2026-01-29 14:59:17 -03:00
ccf89bc2e7 snapshot: add load and namespace summaries 2026-01-29 13:32:38 -03:00
fc2a482df1 snapshot: add node load and namespace capacity 2026-01-29 13:05:04 -03:00
29575baeb0 snapshot: add pod top metrics with node 2026-01-29 10:14:34 -03:00
d4a632d2e4 snapshot: refine longhorn health 2026-01-29 08:41:31 -03:00
a6062be60e snapshot: add longhorn volume summary 2026-01-29 07:43:37 -03:00
c9708a83ea fix(cluster-state): normalize hottest node label expr 2026-01-29 06:11:54 -03:00
bdb7cc4fcd feat(cluster-state): add pod issue reason counts 2026-01-29 05:59:09 -03:00
fbfa701d42 feat(cluster-state): add job and node age summaries 2026-01-29 05:54:00 -03:00
6bc8c4c84d fix(cluster-state): escape nodename in hottest queries 2026-01-29 05:36:44 -03:00
3 changed files with 2532 additions and 72 deletions

File diff suppressed because it is too large Load Diff

View File

@ -189,6 +189,7 @@ class Settings:
k8s_api_timeout_sec: float k8s_api_timeout_sec: float
vm_url: str vm_url: str
cluster_state_vm_timeout_sec: float cluster_state_vm_timeout_sec: float
alertmanager_url: str
mailu_sync_cron: str mailu_sync_cron: str
nextcloud_sync_cron: str nextcloud_sync_cron: str
@ -469,6 +470,7 @@ class Settings:
"http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428", "http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428",
).rstrip("/"), ).rstrip("/"),
"cluster_state_vm_timeout_sec": _env_float("ARIADNE_CLUSTER_STATE_VM_TIMEOUT_SEC", 5.0), "cluster_state_vm_timeout_sec": _env_float("ARIADNE_CLUSTER_STATE_VM_TIMEOUT_SEC", 5.0),
"alertmanager_url": _env("ARIADNE_ALERTMANAGER_URL", "").rstrip("/"),
"cluster_state_cron": _env("ARIADNE_SCHEDULE_CLUSTER_STATE", "*/15 * * * *"), "cluster_state_cron": _env("ARIADNE_SCHEDULE_CLUSTER_STATE", "*/15 * * * *"),
"cluster_state_keep": _env_int("ARIADNE_CLUSTER_STATE_KEEP", 168), "cluster_state_keep": _env_int("ARIADNE_CLUSTER_STATE_KEEP", 168),
} }

View File

@ -143,6 +143,19 @@ def test_collect_cluster_state(monkeypatch) -> None:
assert snapshot["metrics"]["pod_mem_top"] == [] assert snapshot["metrics"]["pod_mem_top"] == []
assert snapshot["metrics"]["job_failures_24h"] == [] assert snapshot["metrics"]["job_failures_24h"] == []
assert snapshot["metrics"]["pvc_usage_top"] == [] assert snapshot["metrics"]["pvc_usage_top"] == []
assert snapshot["summary"]["counts"]["nodes_total"] == 5.0
assert snapshot["summary"]["counts"]["nodes_ready"] == 5.0
assert snapshot["summary"]["counts"]["pods_running"] == 5.0
assert snapshot["summary"]["top"]["namespace_pods"][0]["namespace"] == "media"
assert snapshot["summary"]["baseline_window"]
assert "workload_not_ready" in snapshot["summary"]["top"]
assert "pod_restarts" in snapshot["summary"]["top"]
assert "attention_ranked" in snapshot["summary"]
assert snapshot["summary"]["health_bullets"]
assert snapshot["summary"]["unknowns"] == []
assert snapshot["context"]["nodes"]
assert snapshot["context"]["namespaces"]
assert "baseline" in snapshot["context"]["nodes"][0]
assert summary.nodes_total == 2 assert summary.nodes_total == 2
assert summary.nodes_ready == 1 assert summary.nodes_ready == 1
assert summary.pods_running == 5.0 assert summary.pods_running == 5.0