diff --git a/scripts/render_dashboards.py b/scripts/render_dashboards.py index 3c0d6fa..a3ffb94 100644 --- a/scripts/render_dashboards.py +++ b/scripts/render_dashboards.py @@ -166,11 +166,17 @@ def node_io_expr(scope=""): def namespace_cpu_share_expr(): - return f"({NAMESPACE_CPU_EXPR}) * on(namespace) group_left() ({NAMESPACE_COMBINED_FILTER})" + return ( + f"(100 * ( {NAMESPACE_CPU_RAW} ) / sum( {NAMESPACE_CPU_RAW} )) * on(namespace) group_left() " + f"( {NAMESPACE_COMBINED_FILTER} )" + ) def namespace_ram_share_expr(): - return f"({NAMESPACE_RAM_EXPR}) * on(namespace) group_left() ({NAMESPACE_COMBINED_FILTER})" + return ( + f"(100 * ( {NAMESPACE_RAM_RAW} ) / sum( {NAMESPACE_RAM_RAW} )) * on(namespace) group_left() " + f"( {NAMESPACE_COMBINED_FILTER} )" + ) PROBLEM_PODS_EXPR = 'sum(max by (namespace,pod) (kube_pod_status_phase{phase!~"Running|Succeeded"}))' @@ -206,17 +212,17 @@ STUCK_TABLE_EXPR = ( ")" ) -NAMESPACE_CPU_EXPR = ( +NAMESPACE_CPU_RAW = ( 'sum(rate(container_cpu_usage_seconds_total{namespace!="",pod!="",container!=""}[5m])) by (namespace)' ) -NAMESPACE_RAM_EXPR = ( +NAMESPACE_RAM_RAW = ( 'sum(container_memory_working_set_bytes{namespace!="",pod!="",container!=""}) by (namespace)' ) NAMESPACE_COMBINED_FILTER = ( 'topk(10, (' - + NAMESPACE_CPU_EXPR + + NAMESPACE_CPU_RAW + ") + (" - + NAMESPACE_RAM_EXPR + + NAMESPACE_RAM_RAW + ' / 1e9))' ) TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))" diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 7529ae8..572f439 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -722,7 +722,7 @@ }, "targets": [ { - "expr": "(sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) * on(namespace) group_left() (topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)))", + "expr": "(100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) / sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) )) * on(namespace) group_left() ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)) )", "refId": "A", "legendFormat": "{{namespace}}" } @@ -764,7 +764,7 @@ }, "targets": [ { - "expr": "(sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace)) * on(namespace) group_left() (topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)))", + "expr": "(100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) / sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) )) * on(namespace) group_left() ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)) )", "refId": "A", "legendFormat": "{{namespace}}" } diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index ea3523c..77f73ef 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -731,7 +731,7 @@ data: }, "targets": [ { - "expr": "(sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) * on(namespace) group_left() (topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)))", + "expr": "(100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) / sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) )) * on(namespace) group_left() ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)) )", "refId": "A", "legendFormat": "{{namespace}}" } @@ -773,7 +773,7 @@ data: }, "targets": [ { - "expr": "(sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace)) * on(namespace) group_left() (topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)))", + "expr": "(100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) / sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) )) * on(namespace) group_left() ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)) )", "refId": "A", "legendFormat": "{{namespace}}" }