monitoring: add gpu pie and tidy net panels

2025-11-18 00:11:39 -03:00 · 2025-11-18 00:11:39 -03:00 · 2ba642d49f
commit 2ba642d49f
parent beb3243839
3 changed files with 239 additions and 126 deletions
--- a/scripts/render_dashboards.py
+++ b/scripts/render_dashboards.py
@ -167,12 +167,20 @@ def node_io_expr(scope=""):

 def namespace_cpu_share_expr():
    selected = f"( {NAMESPACE_CPU_RAW} ) and on(namespace) ( {NAMESPACE_COMBINED_FILTER} )"
-    return f"100 * ( {selected} ) / sum( {NAMESPACE_CPU_RAW} )"
+    total = f"clamp_min(sum( {NAMESPACE_CPU_RAW} ), 1)"
+    return f"100 * ( {selected} ) / {total}"


 def namespace_ram_share_expr():
    selected = f"( {NAMESPACE_RAM_RAW} ) and on(namespace) ( {NAMESPACE_COMBINED_FILTER} )"
-    return f"100 * ( {selected} ) / sum( {NAMESPACE_RAM_RAW} )"
+    total = f"clamp_min(sum( {NAMESPACE_RAM_RAW} ), 1)"
+    return f"100 * ( {selected} ) / {total}"
+
+
+def namespace_gpu_share_expr():
+    selected = f"( {NAMESPACE_GPU_RAW} ) and on(namespace) ( {NAMESPACE_COMBINED_FILTER} )"
+    total = f"clamp_min(sum( {NAMESPACE_GPU_RAW} ), 1)"
+    return f"100 * ( {selected} ) / {total}"


 PROBLEM_PODS_EXPR = 'sum(max by (namespace,pod) (kube_pod_status_phase{phase!~"Running|Succeeded"}))'
@ -214,12 +222,17 @@ NAMESPACE_CPU_RAW = (
 NAMESPACE_RAM_RAW = (
    'sum(container_memory_working_set_bytes{namespace!="",pod!="",container!=""}) by (namespace)'
 )
+NAMESPACE_GPU_RAW = (
+    'sum(kube_pod_container_resource_requests{namespace!="",resource="nvidia.com/gpu"}) by (namespace)'
+)
 NAMESPACE_COMBINED_FILTER = (
    'topk(10, ('
    + NAMESPACE_CPU_RAW
    + ") + ("
    + NAMESPACE_RAM_RAW
-    + ' / 1e9))'
+    + ' / 1e9) + ( '
+    + NAMESPACE_GPU_RAW
+    + ' * 10))'
 )
 TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
 NET_INGRESS_EXPR = (
@ -512,22 +525,30 @@ def build_overview():
            11,
            "Namespace CPU share",
            namespace_cpu_share_expr(),
-            {"h": 9, "w": 12, "x": 0, "y": 10},
+            {"h": 9, "w": 8, "x": 0, "y": 10},
        )
    )
    panels.append(
        pie_panel(
            12,
+            "Namespace GPU share",
+            namespace_gpu_share_expr(),
+            {"h": 9, "w": 8, "x": 8, "y": 10},
+        )
+    )
+    panels.append(
+        pie_panel(
+            13,
            "Namespace RAM share",
            namespace_ram_share_expr(),
-            {"h": 9, "w": 12, "x": 12, "y": 10},
+            {"h": 9, "w": 8, "x": 16, "y": 10},
        )
    )

    worker_filter = f"{WORKER_REGEX}"
    panels.append(
        timeseries_panel(
-            13,
+            14,
            "Worker node CPU",
            node_cpu_expr(worker_filter),
            {"h": 8, "w": 12, "x": 0, "y": 19},
@ -541,7 +562,7 @@ def build_overview():
    )
    panels.append(
        timeseries_panel(
-            14,
+            15,
            "Worker node RAM",
            node_mem_expr(worker_filter),
            {"h": 8, "w": 12, "x": 12, "y": 19},
@ -556,7 +577,7 @@ def build_overview():

    panels.append(
        timeseries_panel(
-            15,
+            16,
            "Control plane CPU",
            node_cpu_expr(CONTROL_REGEX),
            {"h": 7, "w": 12, "x": 0, "y": 27},
@ -568,7 +589,7 @@ def build_overview():
    )
    panels.append(
        timeseries_panel(
-            16,
+            17,
            "Control plane RAM",
            node_mem_expr(CONTROL_REGEX),
            {"h": 7, "w": 12, "x": 12, "y": 27},
@ -581,11 +602,12 @@ def build_overview():

    panels.append(
        timeseries_panel(
-            17,
+            18,
            "Cluster ingress throughput",
            NET_INGRESS_EXPR,
            {"h": 7, "w": 12, "x": 0, "y": 34},
            unit="Bps",
+            legend="Ingress",
            legend_display="list",
            legend_placement="bottom",
            links=link_to("atlas-network"),
@ -593,11 +615,12 @@ def build_overview():
    )
    panels.append(
        timeseries_panel(
-            18,
+            19,
            "Cluster egress throughput",
            NET_EGRESS_EXPR,
            {"h": 7, "w": 12, "x": 12, "y": 34},
            unit="Bps",
+            legend="Egress",
            legend_display="list",
            legend_placement="bottom",
            links=link_to("atlas-network"),
@ -606,7 +629,7 @@ def build_overview():

    panels.append(
        timeseries_panel(
-            19,
+            20,
            "Root filesystem usage",
            root_usage_expr(),
            {"h": 8, "w": 12, "x": 0, "y": 41},
@ -621,12 +644,12 @@ def build_overview():
    )
    panels.append(
        {
-            "id": 20,
+            "id": 21,
            "type": "bargauge",
            "title": "Nodes closest to full root disks",
            "datasource": PROM_DS,
            "gridPos": {"h": 8, "w": 12, "x": 12, "y": 41},
-            "targets": [{"expr": f"topk(8, {root_usage_expr()})", "refId": "A"}],
+            "targets": [{"expr": f"topk(8, {root_usage_expr()})", "refId": "A", "legendFormat": "{{node}}"}],
            "fieldConfig": {
                "defaults": {
                    "unit": "percent",
--- a/services/monitoring/dashboards/atlas-overview.json
+++ b/services/monitoring/dashboards/atlas-overview.json
@ -716,13 +716,13 @@
      },
      "gridPos": {
        "h": 9,
-        "w": 12,
+        "w": 8,
        "x": 0,
        "y": 10
      },
      "targets": [
        {
-          "expr": "100 * ( ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)) ) ) / sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) )",
+          "expr": "100 * ( ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) * 10)) ) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ), 1)",
          "refId": "A",
          "legendFormat": "{{namespace}}"
        }
@ -751,20 +751,20 @@
    {
      "id": 12,
      "type": "piechart",
-      "title": "Namespace RAM share",
+      "title": "Namespace GPU share",
      "datasource": {
        "type": "prometheus",
        "uid": "atlas-vm"
      },
      "gridPos": {
        "h": 9,
-        "w": 12,
-        "x": 12,
+        "w": 8,
+        "x": 8,
        "y": 10
      },
      "targets": [
        {
-          "expr": "100 * ( ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)) ) ) / sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) )",
+          "expr": "100 * ( ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) * 10)) ) ) / clamp_min(sum( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) ), 1)",
          "refId": "A",
          "legendFormat": "{{namespace}}"
        }
@ -792,6 +792,48 @@
    },
    {
      "id": 13,
+      "type": "piechart",
+      "title": "Namespace RAM share",
+      "datasource": {
+        "type": "prometheus",
+        "uid": "atlas-vm"
+      },
+      "gridPos": {
+        "h": 9,
+        "w": 8,
+        "x": 16,
+        "y": 10
+      },
+      "targets": [
+        {
+          "expr": "100 * ( ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) * 10)) ) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ), 1)",
+          "refId": "A",
+          "legendFormat": "{{namespace}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "options": {
+        "legend": {
+          "displayMode": "list",
+          "placement": "right"
+        },
+        "pieType": "pie",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        }
+      }
+    },
+    {
+      "id": 14,
      "type": "timeseries",
      "title": "Worker node CPU",
      "datasource": {
@ -838,7 +880,7 @@
      ]
    },
    {
-      "id": 14,
+      "id": 15,
      "type": "timeseries",
      "title": "Worker node RAM",
      "datasource": {
@ -885,7 +927,7 @@
      ]
    },
    {
-      "id": 15,
+      "id": 16,
      "type": "timeseries",
      "title": "Control plane CPU",
      "datasource": {
@ -922,7 +964,7 @@
      }
    },
    {
-      "id": 16,
+      "id": 17,
      "type": "timeseries",
      "title": "Control plane RAM",
      "datasource": {
@ -959,7 +1001,7 @@
      }
    },
    {
-      "id": 17,
+      "id": 18,
      "type": "timeseries",
      "title": "Cluster ingress throughput",
      "datasource": {
@ -975,50 +1017,8 @@
      "targets": [
        {
          "expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])) or on() vector(0)",
-          "refId": "A"
-        }
-      ],
-      "fieldConfig": {
-        "defaults": {
-          "unit": "Bps"
-        },
-        "overrides": []
-      },
-      "options": {
-        "legend": {
-          "displayMode": "list",
-          "placement": "bottom"
-        },
-        "tooltip": {
-          "mode": "multi"
-        }
-      },
-      "links": [
-        {
-          "title": "Open atlas-network dashboard",
-          "url": "/d/atlas-network",
-          "targetBlank": true
-        }
-      ]
-    },
-    {
-      "id": 18,
-      "type": "timeseries",
-      "title": "Cluster egress throughput",
-      "datasource": {
-        "type": "prometheus",
-        "uid": "atlas-vm"
-      },
-      "gridPos": {
-        "h": 7,
-        "w": 12,
-        "x": 12,
-        "y": 34
-      },
-      "targets": [
-        {
-          "expr": "sum(rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])) or on() vector(0)",
-          "refId": "A"
+          "refId": "A",
+          "legendFormat": "Ingress"
        }
      ],
      "fieldConfig": {
@ -1047,6 +1047,50 @@
    {
      "id": 19,
      "type": "timeseries",
+      "title": "Cluster egress throughput",
+      "datasource": {
+        "type": "prometheus",
+        "uid": "atlas-vm"
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 12,
+        "x": 12,
+        "y": 34
+      },
+      "targets": [
+        {
+          "expr": "sum(rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])) or on() vector(0)",
+          "refId": "A",
+          "legendFormat": "Egress"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps"
+        },
+        "overrides": []
+      },
+      "options": {
+        "legend": {
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "multi"
+        }
+      },
+      "links": [
+        {
+          "title": "Open atlas-network dashboard",
+          "url": "/d/atlas-network",
+          "targetBlank": true
+        }
+      ]
+    },
+    {
+      "id": 20,
+      "type": "timeseries",
      "title": "Root filesystem usage",
      "datasource": {
        "type": "prometheus",
@ -1093,7 +1137,7 @@
      ]
    },
    {
-      "id": 20,
+      "id": 21,
      "type": "bargauge",
      "title": "Nodes closest to full root disks",
      "datasource": {
@ -1109,7 +1153,8 @@
      "targets": [
        {
          "expr": "topk(8, avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
-          "refId": "A"
+          "refId": "A",
+          "legendFormat": "{{node}}"
        }
      ],
      "fieldConfig": {
--- a/services/monitoring/grafana-dashboard-overview.yaml
+++ b/services/monitoring/grafana-dashboard-overview.yaml
@ -725,13 +725,13 @@ data:
          },
          "gridPos": {
            "h": 9,
-            "w": 12,
+            "w": 8,
            "x": 0,
            "y": 10
          },
          "targets": [
            {
-              "expr": "100 * ( ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)) ) ) / sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) )",
+              "expr": "100 * ( ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) * 10)) ) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ), 1)",
              "refId": "A",
              "legendFormat": "{{namespace}}"
            }
@ -760,20 +760,20 @@ data:
        {
          "id": 12,
          "type": "piechart",
-          "title": "Namespace RAM share",
+          "title": "Namespace GPU share",
          "datasource": {
            "type": "prometheus",
            "uid": "atlas-vm"
          },
          "gridPos": {
            "h": 9,
-            "w": 12,
-            "x": 12,
+            "w": 8,
+            "x": 8,
            "y": 10
          },
          "targets": [
            {
-              "expr": "100 * ( ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)) ) ) / sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) )",
+              "expr": "100 * ( ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) * 10)) ) ) / clamp_min(sum( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) ), 1)",
              "refId": "A",
              "legendFormat": "{{namespace}}"
            }
@ -801,6 +801,48 @@ data:
        },
        {
          "id": 13,
+          "type": "piechart",
+          "title": "Namespace RAM share",
+          "datasource": {
+            "type": "prometheus",
+            "uid": "atlas-vm"
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 8,
+            "x": 16,
+            "y": 10
+          },
+          "targets": [
+            {
+              "expr": "100 * ( ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) * 10)) ) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ), 1)",
+              "refId": "A",
+              "legendFormat": "{{namespace}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent"
+            },
+            "overrides": []
+          },
+          "options": {
+            "legend": {
+              "displayMode": "list",
+              "placement": "right"
+            },
+            "pieType": "pie",
+            "reduceOptions": {
+              "calcs": [
+                "lastNotNull"
+              ],
+              "fields": "",
+              "values": false
+            }
+          }
+        },
+        {
+          "id": 14,
          "type": "timeseries",
          "title": "Worker node CPU",
          "datasource": {
@ -847,7 +889,7 @@ data:
          ]
        },
        {
-          "id": 14,
+          "id": 15,
          "type": "timeseries",
          "title": "Worker node RAM",
          "datasource": {
@ -894,7 +936,7 @@ data:
          ]
        },
        {
-          "id": 15,
+          "id": 16,
          "type": "timeseries",
          "title": "Control plane CPU",
          "datasource": {
@ -931,7 +973,7 @@ data:
          }
        },
        {
-          "id": 16,
+          "id": 17,
          "type": "timeseries",
          "title": "Control plane RAM",
          "datasource": {
@ -968,7 +1010,7 @@ data:
          }
        },
        {
-          "id": 17,
+          "id": 18,
          "type": "timeseries",
          "title": "Cluster ingress throughput",
          "datasource": {
@ -984,50 +1026,8 @@ data:
          "targets": [
            {
              "expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])) or on() vector(0)",
-              "refId": "A"
-            }
-          ],
-          "fieldConfig": {
-            "defaults": {
-              "unit": "Bps"
-            },
-            "overrides": []
-          },
-          "options": {
-            "legend": {
-              "displayMode": "list",
-              "placement": "bottom"
-            },
-            "tooltip": {
-              "mode": "multi"
-            }
-          },
-          "links": [
-            {
-              "title": "Open atlas-network dashboard",
-              "url": "/d/atlas-network",
-              "targetBlank": true
-            }
-          ]
-        },
-        {
-          "id": 18,
-          "type": "timeseries",
-          "title": "Cluster egress throughput",
-          "datasource": {
-            "type": "prometheus",
-            "uid": "atlas-vm"
-          },
-          "gridPos": {
-            "h": 7,
-            "w": 12,
-            "x": 12,
-            "y": 34
-          },
-          "targets": [
-            {
-              "expr": "sum(rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])) or on() vector(0)",
-              "refId": "A"
+              "refId": "A",
+              "legendFormat": "Ingress"
            }
          ],
          "fieldConfig": {
@ -1056,6 +1056,50 @@ data:
        {
          "id": 19,
          "type": "timeseries",
+          "title": "Cluster egress throughput",
+          "datasource": {
+            "type": "prometheus",
+            "uid": "atlas-vm"
+          },
+          "gridPos": {
+            "h": 7,
+            "w": 12,
+            "x": 12,
+            "y": 34
+          },
+          "targets": [
+            {
+              "expr": "sum(rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])) or on() vector(0)",
+              "refId": "A",
+              "legendFormat": "Egress"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "Bps"
+            },
+            "overrides": []
+          },
+          "options": {
+            "legend": {
+              "displayMode": "list",
+              "placement": "bottom"
+            },
+            "tooltip": {
+              "mode": "multi"
+            }
+          },
+          "links": [
+            {
+              "title": "Open atlas-network dashboard",
+              "url": "/d/atlas-network",
+              "targetBlank": true
+            }
+          ]
+        },
+        {
+          "id": 20,
+          "type": "timeseries",
          "title": "Root filesystem usage",
          "datasource": {
            "type": "prometheus",
@ -1102,7 +1146,7 @@ data:
          ]
        },
        {
-          "id": 20,
+          "id": 21,
          "type": "bargauge",
          "title": "Nodes closest to full root disks",
          "datasource": {
@ -1118,7 +1162,8 @@ data:
          "targets": [
            {
              "expr": "topk(8, avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
-              "refId": "A"
+              "refId": "A",
+              "legendFormat": "{{node}}"
            }
          ],
          "fieldConfig": {