atlasbot: add pod resource top metrics
This commit is contained in:
parent
2370aa4e5d
commit
281118b810
@ -1073,6 +1073,16 @@ def _summarize_metrics(errors: list[str]) -> dict[str, Any]:
|
||||
metrics["top_restarts_1h"] = _vm_vector(
|
||||
f"topk(5, sum by (namespace,pod) (increase(kube_pod_container_status_restarts_total[{_RESTARTS_WINDOW}])))"
|
||||
)
|
||||
metrics["pod_cpu_top"] = _filter_namespace_vector(
|
||||
_vm_vector(
|
||||
f'topk(5, sum by (namespace,pod) (rate(container_cpu_usage_seconds_total{{namespace!=""}}[{_RATE_WINDOW}])))'
|
||||
)
|
||||
)
|
||||
metrics["pod_mem_top"] = _filter_namespace_vector(
|
||||
_vm_vector(
|
||||
"topk(5, sum by (namespace,pod) (container_memory_working_set_bytes{namespace!=\"\"}))"
|
||||
)
|
||||
)
|
||||
except Exception as exc:
|
||||
errors.append(f"vm: {exc}")
|
||||
metrics["postgres_connections"] = _postgres_connections(errors)
|
||||
@ -1106,6 +1116,8 @@ def _summarize_metrics(errors: list[str]) -> dict[str, Any]:
|
||||
"io": "bytes_per_sec",
|
||||
"disk": "percent",
|
||||
"restarts": "count",
|
||||
"pod_cpu": "cores",
|
||||
"pod_mem": "bytes",
|
||||
"namespace_cpu": "cores",
|
||||
"namespace_mem": "bytes",
|
||||
"pvc_used_percent": "percent",
|
||||
|
||||
@ -124,6 +124,8 @@ def test_collect_cluster_state(monkeypatch) -> None:
|
||||
assert "node_usage_stats" in snapshot["metrics"]
|
||||
assert snapshot["metrics"]["namespace_cpu_top"] == []
|
||||
assert snapshot["metrics"]["namespace_mem_top"] == []
|
||||
assert snapshot["metrics"]["pod_cpu_top"] == []
|
||||
assert snapshot["metrics"]["pod_mem_top"] == []
|
||||
assert snapshot["metrics"]["pvc_usage_top"] == []
|
||||
assert summary.nodes_total == 2
|
||||
assert summary.nodes_ready == 1
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user