feature/atlas-monitoring #3

Merged
bstein merged 71 commits from feature/atlas-monitoring into main 2025-12-02 20:52:36 +00:00
3 changed files with 23 additions and 10 deletions
Showing only changes of commit be6052c47c - Show all commits

View File

@ -165,6 +165,14 @@ def node_io_expr(scope=""):
return scoped_node_expr(base, scope) return scoped_node_expr(base, scope)
def namespace_cpu_share_expr():
return f"({NAMESPACE_CPU_EXPR}) * on(namespace) group_left() ({NAMESPACE_COMBINED_FILTER})"
def namespace_ram_share_expr():
return f"({NAMESPACE_RAM_EXPR}) * on(namespace) group_left() ({NAMESPACE_COMBINED_FILTER})"
PROBLEM_PODS_EXPR = 'sum(max by (namespace,pod) (kube_pod_status_phase{phase!~"Running|Succeeded"}))' PROBLEM_PODS_EXPR = 'sum(max by (namespace,pod) (kube_pod_status_phase{phase!~"Running|Succeeded"}))'
CRASHLOOP_EXPR = ( CRASHLOOP_EXPR = (
'sum(max by (namespace,pod) (kube_pod_container_status_waiting_reason' 'sum(max by (namespace,pod) (kube_pod_container_status_waiting_reason'
@ -199,12 +207,17 @@ STUCK_TABLE_EXPR = (
) )
NAMESPACE_CPU_EXPR = ( NAMESPACE_CPU_EXPR = (
'topk(10, sum(rate(container_cpu_usage_seconds_total{namespace!=""' 'sum(rate(container_cpu_usage_seconds_total{namespace!="",pod!="",container!=""}[5m])) by (namespace)'
',pod!=""}[5m])) by (namespace))'
) )
NAMESPACE_RAM_EXPR = ( NAMESPACE_RAM_EXPR = (
'topk(10, sum(container_memory_working_set_bytes{namespace!=""' 'sum(container_memory_working_set_bytes{namespace!="",pod!="",container!=""}) by (namespace)'
',pod!=""}) by (namespace))' )
NAMESPACE_COMBINED_FILTER = (
'topk(10, ('
+ NAMESPACE_CPU_EXPR
+ ") + ("
+ NAMESPACE_RAM_EXPR
+ ' / 1e9))'
) )
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))" TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
NET_INGRESS_EXPR = ( NET_INGRESS_EXPR = (
@ -496,7 +509,7 @@ def build_overview():
pie_panel( pie_panel(
11, 11,
"Namespace CPU share", "Namespace CPU share",
NAMESPACE_CPU_EXPR, namespace_cpu_share_expr(),
{"h": 9, "w": 12, "x": 0, "y": 10}, {"h": 9, "w": 12, "x": 0, "y": 10},
) )
) )
@ -504,7 +517,7 @@ def build_overview():
pie_panel( pie_panel(
12, 12,
"Namespace RAM share", "Namespace RAM share",
NAMESPACE_RAM_EXPR, namespace_ram_share_expr(),
{"h": 9, "w": 12, "x": 12, "y": 10}, {"h": 9, "w": 12, "x": 12, "y": 10},
) )
) )

View File

@ -722,7 +722,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "topk(10, sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\"}[5m])) by (namespace))", "expr": "(sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) * on(namespace) group_left() (topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)))",
"refId": "A", "refId": "A",
"legendFormat": "{{namespace}}" "legendFormat": "{{namespace}}"
} }
@ -764,7 +764,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "topk(10, sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\"}) by (namespace))", "expr": "(sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace)) * on(namespace) group_left() (topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)))",
"refId": "A", "refId": "A",
"legendFormat": "{{namespace}}" "legendFormat": "{{namespace}}"
} }

View File

@ -731,7 +731,7 @@ data:
}, },
"targets": [ "targets": [
{ {
"expr": "topk(10, sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\"}[5m])) by (namespace))", "expr": "(sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) * on(namespace) group_left() (topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)))",
"refId": "A", "refId": "A",
"legendFormat": "{{namespace}}" "legendFormat": "{{namespace}}"
} }
@ -773,7 +773,7 @@ data:
}, },
"targets": [ "targets": [
{ {
"expr": "topk(10, sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\"}) by (namespace))", "expr": "(sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace)) * on(namespace) group_left() (topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)))",
"refId": "A", "refId": "A",
"legendFormat": "{{namespace}}" "legendFormat": "{{namespace}}"
} }