atlasbot: add pod resource top metrics
This commit is contained in:
parent
2370aa4e5d
commit
281118b810
@ -1073,6 +1073,16 @@ def _summarize_metrics(errors: list[str]) -> dict[str, Any]:
|
|||||||
metrics["top_restarts_1h"] = _vm_vector(
|
metrics["top_restarts_1h"] = _vm_vector(
|
||||||
f"topk(5, sum by (namespace,pod) (increase(kube_pod_container_status_restarts_total[{_RESTARTS_WINDOW}])))"
|
f"topk(5, sum by (namespace,pod) (increase(kube_pod_container_status_restarts_total[{_RESTARTS_WINDOW}])))"
|
||||||
)
|
)
|
||||||
|
metrics["pod_cpu_top"] = _filter_namespace_vector(
|
||||||
|
_vm_vector(
|
||||||
|
f'topk(5, sum by (namespace,pod) (rate(container_cpu_usage_seconds_total{{namespace!=""}}[{_RATE_WINDOW}])))'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
metrics["pod_mem_top"] = _filter_namespace_vector(
|
||||||
|
_vm_vector(
|
||||||
|
"topk(5, sum by (namespace,pod) (container_memory_working_set_bytes{namespace!=\"\"}))"
|
||||||
|
)
|
||||||
|
)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
errors.append(f"vm: {exc}")
|
errors.append(f"vm: {exc}")
|
||||||
metrics["postgres_connections"] = _postgres_connections(errors)
|
metrics["postgres_connections"] = _postgres_connections(errors)
|
||||||
@ -1106,6 +1116,8 @@ def _summarize_metrics(errors: list[str]) -> dict[str, Any]:
|
|||||||
"io": "bytes_per_sec",
|
"io": "bytes_per_sec",
|
||||||
"disk": "percent",
|
"disk": "percent",
|
||||||
"restarts": "count",
|
"restarts": "count",
|
||||||
|
"pod_cpu": "cores",
|
||||||
|
"pod_mem": "bytes",
|
||||||
"namespace_cpu": "cores",
|
"namespace_cpu": "cores",
|
||||||
"namespace_mem": "bytes",
|
"namespace_mem": "bytes",
|
||||||
"pvc_used_percent": "percent",
|
"pvc_used_percent": "percent",
|
||||||
|
|||||||
@ -124,6 +124,8 @@ def test_collect_cluster_state(monkeypatch) -> None:
|
|||||||
assert "node_usage_stats" in snapshot["metrics"]
|
assert "node_usage_stats" in snapshot["metrics"]
|
||||||
assert snapshot["metrics"]["namespace_cpu_top"] == []
|
assert snapshot["metrics"]["namespace_cpu_top"] == []
|
||||||
assert snapshot["metrics"]["namespace_mem_top"] == []
|
assert snapshot["metrics"]["namespace_mem_top"] == []
|
||||||
|
assert snapshot["metrics"]["pod_cpu_top"] == []
|
||||||
|
assert snapshot["metrics"]["pod_mem_top"] == []
|
||||||
assert snapshot["metrics"]["pvc_usage_top"] == []
|
assert snapshot["metrics"]["pvc_usage_top"] == []
|
||||||
assert summary.nodes_total == 2
|
assert summary.nodes_total == 2
|
||||||
assert summary.nodes_ready == 1
|
assert summary.nodes_ready == 1
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user