snapshot: add namespace usage stats
This commit is contained in:
parent
bbc36d57c4
commit
558f5c1270
@ -445,6 +445,21 @@ def _vm_vector(expr: str) -> list[dict[str, Any]]:
|
|||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def _filter_namespace_vector(entries: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||||
|
output: list[dict[str, Any]] = []
|
||||||
|
for item in entries:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
||||||
|
namespace = metric.get("namespace")
|
||||||
|
if not isinstance(namespace, str) or not namespace:
|
||||||
|
continue
|
||||||
|
if namespace in _SYSTEM_NAMESPACES:
|
||||||
|
continue
|
||||||
|
output.append(item)
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
def _vm_topk(expr: str, label_key: str) -> dict[str, Any] | None:
|
def _vm_topk(expr: str, label_key: str) -> dict[str, Any] | None:
|
||||||
result = _vm_vector(expr)
|
result = _vm_vector(expr)
|
||||||
if not result:
|
if not result:
|
||||||
@ -538,6 +553,24 @@ def _node_usage(errors: list[str]) -> dict[str, Any]:
|
|||||||
return usage
|
return usage
|
||||||
|
|
||||||
|
|
||||||
|
def _usage_stats(series: list[dict[str, Any]]) -> dict[str, float]:
|
||||||
|
values: list[float] = []
|
||||||
|
for entry in series:
|
||||||
|
if not isinstance(entry, dict):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
values.append(float(entry.get("value")))
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
continue
|
||||||
|
if not values:
|
||||||
|
return {}
|
||||||
|
return {
|
||||||
|
"min": min(values),
|
||||||
|
"max": max(values),
|
||||||
|
"avg": sum(values) / len(values),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _summarize_metrics(errors: list[str]) -> dict[str, Any]:
|
def _summarize_metrics(errors: list[str]) -> dict[str, Any]:
|
||||||
metrics: dict[str, Any] = {}
|
metrics: dict[str, Any] = {}
|
||||||
try:
|
try:
|
||||||
@ -557,12 +590,33 @@ def _summarize_metrics(errors: list[str]) -> dict[str, Any]:
|
|||||||
metrics["postgres_connections"] = _postgres_connections(errors)
|
metrics["postgres_connections"] = _postgres_connections(errors)
|
||||||
metrics["hottest_nodes"] = _hottest_nodes(errors)
|
metrics["hottest_nodes"] = _hottest_nodes(errors)
|
||||||
metrics["node_usage"] = _node_usage(errors)
|
metrics["node_usage"] = _node_usage(errors)
|
||||||
|
metrics["node_usage_stats"] = {
|
||||||
|
"cpu": _usage_stats(metrics.get("node_usage", {}).get("cpu", [])),
|
||||||
|
"ram": _usage_stats(metrics.get("node_usage", {}).get("ram", [])),
|
||||||
|
"net": _usage_stats(metrics.get("node_usage", {}).get("net", [])),
|
||||||
|
"io": _usage_stats(metrics.get("node_usage", {}).get("io", [])),
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
metrics["namespace_cpu_top"] = _filter_namespace_vector(
|
||||||
|
_vm_vector(
|
||||||
|
f'topk(5, sum by (namespace) (rate(container_cpu_usage_seconds_total{{namespace!=""}}[{_RATE_WINDOW}])))'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
metrics["namespace_mem_top"] = _filter_namespace_vector(
|
||||||
|
_vm_vector(
|
||||||
|
"topk(5, sum by (namespace) (container_memory_working_set_bytes{namespace!=\"\"}))"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
errors.append(f"namespace_usage: {exc}")
|
||||||
metrics["units"] = {
|
metrics["units"] = {
|
||||||
"cpu": "percent",
|
"cpu": "percent",
|
||||||
"ram": "percent",
|
"ram": "percent",
|
||||||
"net": "bytes_per_sec",
|
"net": "bytes_per_sec",
|
||||||
"io": "bytes_per_sec",
|
"io": "bytes_per_sec",
|
||||||
"restarts": "count",
|
"restarts": "count",
|
||||||
|
"namespace_cpu": "cores",
|
||||||
|
"namespace_mem": "bytes",
|
||||||
}
|
}
|
||||||
metrics["windows"] = {
|
metrics["windows"] = {
|
||||||
"rates": _RATE_WINDOW,
|
"rates": _RATE_WINDOW,
|
||||||
|
|||||||
@ -82,6 +82,9 @@ def test_collect_cluster_state(monkeypatch) -> None:
|
|||||||
assert snapshot["workloads"]
|
assert snapshot["workloads"]
|
||||||
assert snapshot["namespace_pods"]
|
assert snapshot["namespace_pods"]
|
||||||
assert snapshot["namespace_pods"][0]["namespace"] == "media"
|
assert snapshot["namespace_pods"][0]["namespace"] == "media"
|
||||||
|
assert "node_usage_stats" in snapshot["metrics"]
|
||||||
|
assert snapshot["metrics"]["namespace_cpu_top"] == []
|
||||||
|
assert snapshot["metrics"]["namespace_mem_top"] == []
|
||||||
assert summary.nodes_total == 2
|
assert summary.nodes_total == 2
|
||||||
assert summary.nodes_ready == 1
|
assert summary.nodes_ready == 1
|
||||||
assert summary.pods_running == 5.0
|
assert summary.pods_running == 5.0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user