monitoring: unify namespace share panels

This commit is contained in:
Brad Stein 2025-11-17 21:57:40 -03:00
parent b59677615c
commit be6052c47c
3 changed files with 23 additions and 10 deletions

View File

@ -165,6 +165,14 @@ def node_io_expr(scope=""):
return scoped_node_expr(base, scope)
def namespace_cpu_share_expr():
return f"({NAMESPACE_CPU_EXPR}) * on(namespace) group_left() ({NAMESPACE_COMBINED_FILTER})"
def namespace_ram_share_expr():
return f"({NAMESPACE_RAM_EXPR}) * on(namespace) group_left() ({NAMESPACE_COMBINED_FILTER})"
PROBLEM_PODS_EXPR = 'sum(max by (namespace,pod) (kube_pod_status_phase{phase!~"Running|Succeeded"}))'
CRASHLOOP_EXPR = (
'sum(max by (namespace,pod) (kube_pod_container_status_waiting_reason'
@ -199,12 +207,17 @@ STUCK_TABLE_EXPR = (
)
NAMESPACE_CPU_EXPR = (
'topk(10, sum(rate(container_cpu_usage_seconds_total{namespace!=""'
',pod!=""}[5m])) by (namespace))'
'sum(rate(container_cpu_usage_seconds_total{namespace!="",pod!="",container!=""}[5m])) by (namespace)'
)
NAMESPACE_RAM_EXPR = (
'topk(10, sum(container_memory_working_set_bytes{namespace!=""'
',pod!=""}) by (namespace))'
'sum(container_memory_working_set_bytes{namespace!="",pod!="",container!=""}) by (namespace)'
)
NAMESPACE_COMBINED_FILTER = (
'topk(10, ('
+ NAMESPACE_CPU_EXPR
+ ") + ("
+ NAMESPACE_RAM_EXPR
+ ' / 1e9))'
)
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
NET_INGRESS_EXPR = (
@ -496,7 +509,7 @@ def build_overview():
pie_panel(
11,
"Namespace CPU share",
NAMESPACE_CPU_EXPR,
namespace_cpu_share_expr(),
{"h": 9, "w": 12, "x": 0, "y": 10},
)
)
@ -504,7 +517,7 @@ def build_overview():
pie_panel(
12,
"Namespace RAM share",
NAMESPACE_RAM_EXPR,
namespace_ram_share_expr(),
{"h": 9, "w": 12, "x": 12, "y": 10},
)
)

View File

@ -722,7 +722,7 @@
},
"targets": [
{
"expr": "topk(10, sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\"}[5m])) by (namespace))",
"expr": "(sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) * on(namespace) group_left() (topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)))",
"refId": "A",
"legendFormat": "{{namespace}}"
}
@ -764,7 +764,7 @@
},
"targets": [
{
"expr": "topk(10, sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\"}) by (namespace))",
"expr": "(sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace)) * on(namespace) group_left() (topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)))",
"refId": "A",
"legendFormat": "{{namespace}}"
}

View File

@ -731,7 +731,7 @@ data:
},
"targets": [
{
"expr": "topk(10, sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\"}[5m])) by (namespace))",
"expr": "(sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) * on(namespace) group_left() (topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)))",
"refId": "A",
"legendFormat": "{{namespace}}"
}
@ -773,7 +773,7 @@ data:
},
"targets": [
{
"expr": "topk(10, sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\"}) by (namespace))",
"expr": "(sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace)) * on(namespace) group_left() (topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)))",
"refId": "A",
"legendFormat": "{{namespace}}"
}