diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 34a108a..c08b493 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -46,6 +46,8 @@ PERCENT_THRESHOLDS = { ], } +NAMESPACE_CPU_WINDOW = "1m" + # --------------------------------------------------------------------------- # Cluster metadata # --------------------------------------------------------------------------- @@ -172,7 +174,7 @@ def node_io_expr(scope=""): def namespace_selector(scope_var): - return f'namespace!="",pod!="",container!="",{scope_var}' + return f'namespace!="",pod!="",container!="",container!="POD",{scope_var}' def namespace_gpu_selector(scope_var): @@ -180,7 +182,10 @@ def namespace_gpu_selector(scope_var): def namespace_cpu_raw(scope_var): - return f"sum(rate(container_cpu_usage_seconds_total{{{namespace_selector(scope_var)}}}[5m])) by (namespace)" + return ( + "sum(rate(container_cpu_usage_seconds_total" + f"{{{namespace_selector(scope_var)}}}[{NAMESPACE_CPU_WINDOW}])) by (namespace)" + ) def namespace_ram_raw(scope_var): @@ -942,7 +947,7 @@ def build_overview(): namespace_cpu_share_expr(cpu_scope), {"h": 9, "w": 8, "x": 0, "y": 16}, links=namespace_scope_links("namespace_scope_cpu"), - description="Use panel links to switch namespace scope.", + description="Values are normalized within the selected scope; use panel links to switch scope.", ) ) panels.append( @@ -952,7 +957,7 @@ def build_overview(): namespace_gpu_share_expr(gpu_scope), {"h": 9, "w": 8, "x": 8, "y": 16}, links=namespace_scope_links("namespace_scope_gpu"), - description="Use panel links to switch namespace scope.", + description="Values are normalized within the selected scope; use panel links to switch scope.", ) ) panels.append( @@ -962,7 +967,7 @@ def build_overview(): namespace_ram_share_expr(ram_scope), {"h": 9, "w": 8, "x": 16, "y": 16}, links=namespace_scope_links("namespace_scope_ram"), - description="Use panel links to switch namespace scope.", + description="Values are normalized within the selected scope; use panel links to switch scope.", ) ) @@ -1783,7 +1788,7 @@ def build_gpu_dashboard(): namespace_gpu_share_expr(gpu_scope), {"h": 8, "w": 12, "x": 0, "y": 0}, links=namespace_scope_links("namespace_scope_gpu"), - description="Use panel links to switch namespace scope.", + description="Values are normalized within the selected scope; use panel links to switch scope.", ) ) panels.append( diff --git a/services/monitoring/dashboards/atlas-gpu.json b/services/monitoring/dashboards/atlas-gpu.json index d4ad913..2e71045 100644 --- a/services/monitoring/dashboards/atlas-gpu.json +++ b/services/monitoring/dashboards/atlas-gpu.json @@ -71,7 +71,7 @@ "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 2, diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index ce1b0a3..109988b 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -1086,7 +1086,7 @@ }, "targets": [ { - "expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ), 1)", + "expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1137,7 +1137,7 @@ "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 12, @@ -1206,7 +1206,7 @@ "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 13, @@ -1224,7 +1224,7 @@ }, "targets": [ { - "expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ), 1)", + "expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1275,7 +1275,7 @@ "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 14, diff --git a/services/monitoring/grafana-dashboard-gpu.yaml b/services/monitoring/grafana-dashboard-gpu.yaml index 41b4734..56965eb 100644 --- a/services/monitoring/grafana-dashboard-gpu.yaml +++ b/services/monitoring/grafana-dashboard-gpu.yaml @@ -80,7 +80,7 @@ data: "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 2, diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 557d120..a13ec6f 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -1095,7 +1095,7 @@ data: }, "targets": [ { - "expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ), 1)", + "expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1146,7 +1146,7 @@ data: "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 12, @@ -1215,7 +1215,7 @@ data: "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 13, @@ -1233,7 +1233,7 @@ data: }, "targets": [ { - "expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ), 1)", + "expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ), 1)", "refId": "A", "legendFormat": "{{namespace}}" } @@ -1284,7 +1284,7 @@ data: "targetBlank": false } ], - "description": "Use panel links to switch namespace scope." + "description": "Values are normalized within the selected scope; use panel links to switch scope." }, { "id": 14,