From 57ea397027478b09004f09ec72d1940f0675b4e4 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 13 Dec 2025 18:23:19 -0300 Subject: [PATCH] Use table format for namespace plurality panel --- scripts/dashboards_render_atlas.py | 29 ++++++++++++---- services/monitoring/dashboards/atlas-gpu.json | 8 +++-- .../monitoring/dashboards/atlas-network.json | 16 ++++++--- .../monitoring/dashboards/atlas-pods.json | 33 ++++++++++++++++--- .../monitoring/grafana-dashboard-gpu.yaml | 8 +++-- .../monitoring/grafana-dashboard-network.yaml | 16 ++++++--- .../monitoring/grafana-dashboard-pods.yaml | 33 ++++++++++++++++--- 7 files changed, 117 insertions(+), 26 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 1f1b86c..ed30848 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -529,6 +529,7 @@ def table_panel( options=None, filterable=True, footer=None, + format=None, ): """Return a Grafana table panel definition.""" # Optional PromQL subquery helpers in expr: share(), etc. @@ -538,13 +539,16 @@ def table_panel( if footer is not None: panel_options["footer"] = footer field_defaults = {"unit": unit, "custom": {"filterable": filterable}} + target = {"expr": expr, "refId": "A", **({"instant": True} if instant else {})} + if format: + target["format"] = format panel = { "id": panel_id, "type": "table", "title": title, "datasource": PROM_DS, "gridPos": grid, - "targets": [{"expr": expr, "refId": "A", **({"instant": True} if instant else {})}], + "targets": [target], "fieldConfig": {"defaults": field_defaults, "overrides": []}, "options": panel_options, } @@ -1190,29 +1194,42 @@ def build_pods_dashboard(): ) share_expr = ( - '(sum by (namespace,node) (kube_pod_info{pod!=""}) ' + '(sum by (namespace,node) (kube_pod_info{pod!="" , node!=""}) ' '/ on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=""}), 1) * 100)' ) + rank_expr = " + ".join( + f"(sum by (node) (kube_node_info{{node=\"{node}\"}}) * 1e-6 * {idx})" + for idx, node in enumerate(CONTROL_ALL + WORKER_NODES, start=1) + ) + score_expr = f"{share_expr} + on(node) group_left() ({rank_expr})" mask_expr = ( - f"{share_expr} == bool on(namespace) group_left() " - f"(max by (namespace) ({share_expr}))" + f"{score_expr} == bool on(namespace) group_left() " + f"(max by (namespace) ({score_expr}))" ) nonzero_expr = f"{share_expr} > bool 0" panels.append( table_panel( 10, - "Namespace Plurality by Node v22", + "Namespace Plurality by Node v23", ( f"{share_expr} * on(namespace,node) group_left() " f"({mask_expr}) * on(namespace,node) group_left() ({nonzero_expr})" ), {"h": 8, "w": 24, "x": 0, "y": 42}, unit="percent", - transformations=None, + transformations=[ + {"id": "labelsToFields", "options": {}}, + {"id": "organize", "options": {"excludeByName": {"Time": True}}}, + { + "id": "sortBy", + "options": {"fields": ["node", "Value"], "order": "asc"}, + }, + ], instant=True, options={"showColumnFilters": False}, filterable=False, footer={"show": False, "fields": "", "calcs": []}, + format="table", ) ) diff --git a/services/monitoring/dashboards/atlas-gpu.json b/services/monitoring/dashboards/atlas-gpu.json index 3d7a629..572c2c6 100644 --- a/services/monitoring/dashboards/atlas-gpu.json +++ b/services/monitoring/dashboards/atlas-gpu.json @@ -151,12 +151,16 @@ ], "fieldConfig": { "defaults": { - "unit": "percent" + "unit": "percent", + "custom": { + "filterable": true + } }, "overrides": [] }, "options": { - "showHeader": true + "showHeader": true, + "columnFilters": false }, "transformations": [ { diff --git a/services/monitoring/dashboards/atlas-network.json b/services/monitoring/dashboards/atlas-network.json index 9c2b54a..09e9383 100644 --- a/services/monitoring/dashboards/atlas-network.json +++ b/services/monitoring/dashboards/atlas-network.json @@ -519,12 +519,16 @@ ], "fieldConfig": { "defaults": { - "unit": "Bps" + "unit": "Bps", + "custom": { + "filterable": true + } }, "overrides": [] }, "options": { - "showHeader": true + "showHeader": true, + "columnFilters": false }, "transformations": [ { @@ -555,12 +559,16 @@ ], "fieldConfig": { "defaults": { - "unit": "Bps" + "unit": "Bps", + "custom": { + "filterable": true + } }, "overrides": [] }, "options": { - "showHeader": true + "showHeader": true, + "columnFilters": false }, "transformations": [ { diff --git a/services/monitoring/dashboards/atlas-pods.json b/services/monitoring/dashboards/atlas-pods.json index b520314..f80dd4d 100644 --- a/services/monitoring/dashboards/atlas-pods.json +++ b/services/monitoring/dashboards/atlas-pods.json @@ -507,7 +507,7 @@ { "id": 10, "type": "table", - "title": "Namespace Plurality by Node v22", + "title": "Namespace Plurality by Node v23", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -520,9 +520,10 @@ }, "targets": [ { - "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100)))) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) > bool 0)", + "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 1e-6 * 1) + (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 1e-6 * 2) + (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 1e-6 * 3) + (sum by (node) (kube_node_info{node=\"titan-db\"}) * 1e-6 * 4) + (sum by (node) (kube_node_info{node=\"titan-04\"}) * 1e-6 * 5) + (sum by (node) (kube_node_info{node=\"titan-05\"}) * 1e-6 * 6) + (sum by (node) (kube_node_info{node=\"titan-06\"}) * 1e-6 * 7) + (sum by (node) (kube_node_info{node=\"titan-07\"}) * 1e-6 * 8) + (sum by (node) (kube_node_info{node=\"titan-08\"}) * 1e-6 * 9) + (sum by (node) (kube_node_info{node=\"titan-09\"}) * 1e-6 * 10) + (sum by (node) (kube_node_info{node=\"titan-10\"}) * 1e-6 * 11) + (sum by (node) (kube_node_info{node=\"titan-11\"}) * 1e-6 * 12) + (sum by (node) (kube_node_info{node=\"titan-12\"}) * 1e-6 * 13) + (sum by (node) (kube_node_info{node=\"titan-13\"}) * 1e-6 * 14) + (sum by (node) (kube_node_info{node=\"titan-14\"}) * 1e-6 * 15) + (sum by (node) (kube_node_info{node=\"titan-15\"}) * 1e-6 * 16) + (sum by (node) (kube_node_info{node=\"titan-16\"}) * 1e-6 * 17) + (sum by (node) (kube_node_info{node=\"titan-17\"}) * 1e-6 * 18) + (sum by (node) (kube_node_info{node=\"titan-18\"}) * 1e-6 * 19) + (sum by (node) (kube_node_info{node=\"titan-19\"}) * 1e-6 * 20) + (sum by (node) (kube_node_info{node=\"titan-22\"}) * 1e-6 * 21) + (sum by (node) (kube_node_info{node=\"titan-24\"}) * 1e-6 * 22)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 1e-6 * 1) + (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 1e-6 * 2) + (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 1e-6 * 3) + (sum by (node) (kube_node_info{node=\"titan-db\"}) * 1e-6 * 4) + (sum by (node) (kube_node_info{node=\"titan-04\"}) * 1e-6 * 5) + (sum by (node) (kube_node_info{node=\"titan-05\"}) * 1e-6 * 6) + (sum by (node) (kube_node_info{node=\"titan-06\"}) * 1e-6 * 7) + (sum by (node) (kube_node_info{node=\"titan-07\"}) * 1e-6 * 8) + (sum by (node) (kube_node_info{node=\"titan-08\"}) * 1e-6 * 9) + (sum by (node) (kube_node_info{node=\"titan-09\"}) * 1e-6 * 10) + (sum by (node) (kube_node_info{node=\"titan-10\"}) * 1e-6 * 11) + (sum by (node) (kube_node_info{node=\"titan-11\"}) * 1e-6 * 12) + (sum by (node) (kube_node_info{node=\"titan-12\"}) * 1e-6 * 13) + (sum by (node) (kube_node_info{node=\"titan-13\"}) * 1e-6 * 14) + (sum by (node) (kube_node_info{node=\"titan-14\"}) * 1e-6 * 15) + (sum by (node) (kube_node_info{node=\"titan-15\"}) * 1e-6 * 16) + (sum by (node) (kube_node_info{node=\"titan-16\"}) * 1e-6 * 17) + (sum by (node) (kube_node_info{node=\"titan-17\"}) * 1e-6 * 18) + (sum by (node) (kube_node_info{node=\"titan-18\"}) * 1e-6 * 19) + (sum by (node) (kube_node_info{node=\"titan-19\"}) * 1e-6 * 20) + (sum by (node) (kube_node_info{node=\"titan-22\"}) * 1e-6 * 21) + (sum by (node) (kube_node_info{node=\"titan-24\"}) * 1e-6 * 22))))) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) > bool 0)", "refId": "A", - "instant": true + "instant": true, + "format": "table" } ], "fieldConfig": { @@ -543,7 +544,31 @@ "fields": "", "calcs": [] } - } + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + } + } + }, + { + "id": "sortBy", + "options": { + "fields": [ + "node", + "Value" + ], + "order": "asc" + } + } + ] } ], "time": { diff --git a/services/monitoring/grafana-dashboard-gpu.yaml b/services/monitoring/grafana-dashboard-gpu.yaml index aef9ae6..48725de 100644 --- a/services/monitoring/grafana-dashboard-gpu.yaml +++ b/services/monitoring/grafana-dashboard-gpu.yaml @@ -160,12 +160,16 @@ data: ], "fieldConfig": { "defaults": { - "unit": "percent" + "unit": "percent", + "custom": { + "filterable": true + } }, "overrides": [] }, "options": { - "showHeader": true + "showHeader": true, + "columnFilters": false }, "transformations": [ { diff --git a/services/monitoring/grafana-dashboard-network.yaml b/services/monitoring/grafana-dashboard-network.yaml index c679654..a87600f 100644 --- a/services/monitoring/grafana-dashboard-network.yaml +++ b/services/monitoring/grafana-dashboard-network.yaml @@ -528,12 +528,16 @@ data: ], "fieldConfig": { "defaults": { - "unit": "Bps" + "unit": "Bps", + "custom": { + "filterable": true + } }, "overrides": [] }, "options": { - "showHeader": true + "showHeader": true, + "columnFilters": false }, "transformations": [ { @@ -564,12 +568,16 @@ data: ], "fieldConfig": { "defaults": { - "unit": "Bps" + "unit": "Bps", + "custom": { + "filterable": true + } }, "overrides": [] }, "options": { - "showHeader": true + "showHeader": true, + "columnFilters": false }, "transformations": [ { diff --git a/services/monitoring/grafana-dashboard-pods.yaml b/services/monitoring/grafana-dashboard-pods.yaml index 4b360bf..46dd9de 100644 --- a/services/monitoring/grafana-dashboard-pods.yaml +++ b/services/monitoring/grafana-dashboard-pods.yaml @@ -516,7 +516,7 @@ data: { "id": 10, "type": "table", - "title": "Namespace Plurality by Node v22", + "title": "Namespace Plurality by Node v23", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -529,9 +529,10 @@ data: }, "targets": [ { - "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100)))) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) > bool 0)", + "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 1e-6 * 1) + (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 1e-6 * 2) + (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 1e-6 * 3) + (sum by (node) (kube_node_info{node=\"titan-db\"}) * 1e-6 * 4) + (sum by (node) (kube_node_info{node=\"titan-04\"}) * 1e-6 * 5) + (sum by (node) (kube_node_info{node=\"titan-05\"}) * 1e-6 * 6) + (sum by (node) (kube_node_info{node=\"titan-06\"}) * 1e-6 * 7) + (sum by (node) (kube_node_info{node=\"titan-07\"}) * 1e-6 * 8) + (sum by (node) (kube_node_info{node=\"titan-08\"}) * 1e-6 * 9) + (sum by (node) (kube_node_info{node=\"titan-09\"}) * 1e-6 * 10) + (sum by (node) (kube_node_info{node=\"titan-10\"}) * 1e-6 * 11) + (sum by (node) (kube_node_info{node=\"titan-11\"}) * 1e-6 * 12) + (sum by (node) (kube_node_info{node=\"titan-12\"}) * 1e-6 * 13) + (sum by (node) (kube_node_info{node=\"titan-13\"}) * 1e-6 * 14) + (sum by (node) (kube_node_info{node=\"titan-14\"}) * 1e-6 * 15) + (sum by (node) (kube_node_info{node=\"titan-15\"}) * 1e-6 * 16) + (sum by (node) (kube_node_info{node=\"titan-16\"}) * 1e-6 * 17) + (sum by (node) (kube_node_info{node=\"titan-17\"}) * 1e-6 * 18) + (sum by (node) (kube_node_info{node=\"titan-18\"}) * 1e-6 * 19) + (sum by (node) (kube_node_info{node=\"titan-19\"}) * 1e-6 * 20) + (sum by (node) (kube_node_info{node=\"titan-22\"}) * 1e-6 * 21) + (sum by (node) (kube_node_info{node=\"titan-24\"}) * 1e-6 * 22)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 1e-6 * 1) + (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 1e-6 * 2) + (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 1e-6 * 3) + (sum by (node) (kube_node_info{node=\"titan-db\"}) * 1e-6 * 4) + (sum by (node) (kube_node_info{node=\"titan-04\"}) * 1e-6 * 5) + (sum by (node) (kube_node_info{node=\"titan-05\"}) * 1e-6 * 6) + (sum by (node) (kube_node_info{node=\"titan-06\"}) * 1e-6 * 7) + (sum by (node) (kube_node_info{node=\"titan-07\"}) * 1e-6 * 8) + (sum by (node) (kube_node_info{node=\"titan-08\"}) * 1e-6 * 9) + (sum by (node) (kube_node_info{node=\"titan-09\"}) * 1e-6 * 10) + (sum by (node) (kube_node_info{node=\"titan-10\"}) * 1e-6 * 11) + (sum by (node) (kube_node_info{node=\"titan-11\"}) * 1e-6 * 12) + (sum by (node) (kube_node_info{node=\"titan-12\"}) * 1e-6 * 13) + (sum by (node) (kube_node_info{node=\"titan-13\"}) * 1e-6 * 14) + (sum by (node) (kube_node_info{node=\"titan-14\"}) * 1e-6 * 15) + (sum by (node) (kube_node_info{node=\"titan-15\"}) * 1e-6 * 16) + (sum by (node) (kube_node_info{node=\"titan-16\"}) * 1e-6 * 17) + (sum by (node) (kube_node_info{node=\"titan-17\"}) * 1e-6 * 18) + (sum by (node) (kube_node_info{node=\"titan-18\"}) * 1e-6 * 19) + (sum by (node) (kube_node_info{node=\"titan-19\"}) * 1e-6 * 20) + (sum by (node) (kube_node_info{node=\"titan-22\"}) * 1e-6 * 21) + (sum by (node) (kube_node_info{node=\"titan-24\"}) * 1e-6 * 22))))) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) > bool 0)", "refId": "A", - "instant": true + "instant": true, + "format": "table" } ], "fieldConfig": { @@ -552,7 +553,31 @@ data: "fields": "", "calcs": [] } - } + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + } + } + }, + { + "id": "sortBy", + "options": { + "fields": [ + "node", + "Value" + ], + "order": "asc" + } + } + ] } ], "time": {