From 2ba642d49f92b20057ab914687830c3d6edf449c Mon Sep 17 00:00:00 2001
From: Brad Stein <Brad.Stein@gmail.com>
Date: Tue, 18 Nov 2025 00:11:39 -0300
Subject: [PATCH] monitoring: add gpu pie and tidy net panels

---
 scripts/render_dashboards.py                  |  51 ++++--
 .../monitoring/dashboards/atlas-overview.json | 157 +++++++++++-------
 .../grafana-dashboard-overview.yaml           | 157 +++++++++++-------
 3 files changed, 239 insertions(+), 126 deletions(-)

diff --git a/scripts/render_dashboards.py b/scripts/render_dashboards.py
index 4e8e5a5..c194771 100644
--- a/scripts/render_dashboards.py
+++ b/scripts/render_dashboards.py
@@ -167,12 +167,20 @@ def node_io_expr(scope=""):
 
 def namespace_cpu_share_expr():
     selected = f"( {NAMESPACE_CPU_RAW} ) and on(namespace) ( {NAMESPACE_COMBINED_FILTER} )"
-    return f"100 * ( {selected} ) / sum( {NAMESPACE_CPU_RAW} )"
+    total = f"clamp_min(sum( {NAMESPACE_CPU_RAW} ), 1)"
+    return f"100 * ( {selected} ) / {total}"
 
 
 def namespace_ram_share_expr():
     selected = f"( {NAMESPACE_RAM_RAW} ) and on(namespace) ( {NAMESPACE_COMBINED_FILTER} )"
-    return f"100 * ( {selected} ) / sum( {NAMESPACE_RAM_RAW} )"
+    total = f"clamp_min(sum( {NAMESPACE_RAM_RAW} ), 1)"
+    return f"100 * ( {selected} ) / {total}"
+
+
+def namespace_gpu_share_expr():
+    selected = f"( {NAMESPACE_GPU_RAW} ) and on(namespace) ( {NAMESPACE_COMBINED_FILTER} )"
+    total = f"clamp_min(sum( {NAMESPACE_GPU_RAW} ), 1)"
+    return f"100 * ( {selected} ) / {total}"
 
 
 PROBLEM_PODS_EXPR = 'sum(max by (namespace,pod) (kube_pod_status_phase{phase!~"Running|Succeeded"}))'
@@ -214,12 +222,17 @@ NAMESPACE_CPU_RAW = (
 NAMESPACE_RAM_RAW = (
     'sum(container_memory_working_set_bytes{namespace!="",pod!="",container!=""}) by (namespace)'
 )
+NAMESPACE_GPU_RAW = (
+    'sum(kube_pod_container_resource_requests{namespace!="",resource="nvidia.com/gpu"}) by (namespace)'
+)
 NAMESPACE_COMBINED_FILTER = (
     'topk(10, ('
     + NAMESPACE_CPU_RAW
     + ") + ("
     + NAMESPACE_RAM_RAW
-    + ' / 1e9))'
+    + ' / 1e9) + ( '
+    + NAMESPACE_GPU_RAW
+    + ' * 10))'
 )
 TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
 NET_INGRESS_EXPR = (
@@ -512,22 +525,30 @@ def build_overview():
             11,
             "Namespace CPU share",
             namespace_cpu_share_expr(),
-            {"h": 9, "w": 12, "x": 0, "y": 10},
+            {"h": 9, "w": 8, "x": 0, "y": 10},
         )
     )
     panels.append(
         pie_panel(
             12,
+            "Namespace GPU share",
+            namespace_gpu_share_expr(),
+            {"h": 9, "w": 8, "x": 8, "y": 10},
+        )
+    )
+    panels.append(
+        pie_panel(
+            13,
             "Namespace RAM share",
             namespace_ram_share_expr(),
-            {"h": 9, "w": 12, "x": 12, "y": 10},
+            {"h": 9, "w": 8, "x": 16, "y": 10},
         )
     )
 
     worker_filter = f"{WORKER_REGEX}"
     panels.append(
         timeseries_panel(
-            13,
+            14,
             "Worker node CPU",
             node_cpu_expr(worker_filter),
             {"h": 8, "w": 12, "x": 0, "y": 19},
@@ -541,7 +562,7 @@ def build_overview():
     )
     panels.append(
         timeseries_panel(
-            14,
+            15,
             "Worker node RAM",
             node_mem_expr(worker_filter),
             {"h": 8, "w": 12, "x": 12, "y": 19},
@@ -556,7 +577,7 @@ def build_overview():
 
     panels.append(
         timeseries_panel(
-            15,
+            16,
             "Control plane CPU",
             node_cpu_expr(CONTROL_REGEX),
             {"h": 7, "w": 12, "x": 0, "y": 27},
@@ -568,7 +589,7 @@ def build_overview():
     )
     panels.append(
         timeseries_panel(
-            16,
+            17,
             "Control plane RAM",
             node_mem_expr(CONTROL_REGEX),
             {"h": 7, "w": 12, "x": 12, "y": 27},
@@ -581,11 +602,12 @@ def build_overview():
 
     panels.append(
         timeseries_panel(
-            17,
+            18,
             "Cluster ingress throughput",
             NET_INGRESS_EXPR,
             {"h": 7, "w": 12, "x": 0, "y": 34},
             unit="Bps",
+            legend="Ingress",
             legend_display="list",
             legend_placement="bottom",
             links=link_to("atlas-network"),
@@ -593,11 +615,12 @@ def build_overview():
     )
     panels.append(
         timeseries_panel(
-            18,
+            19,
             "Cluster egress throughput",
             NET_EGRESS_EXPR,
             {"h": 7, "w": 12, "x": 12, "y": 34},
             unit="Bps",
+            legend="Egress",
             legend_display="list",
             legend_placement="bottom",
             links=link_to("atlas-network"),
@@ -606,7 +629,7 @@ def build_overview():
 
     panels.append(
         timeseries_panel(
-            19,
+            20,
             "Root filesystem usage",
             root_usage_expr(),
             {"h": 8, "w": 12, "x": 0, "y": 41},
@@ -621,12 +644,12 @@ def build_overview():
     )
     panels.append(
         {
-            "id": 20,
+            "id": 21,
             "type": "bargauge",
             "title": "Nodes closest to full root disks",
             "datasource": PROM_DS,
             "gridPos": {"h": 8, "w": 12, "x": 12, "y": 41},
-            "targets": [{"expr": f"topk(8, {root_usage_expr()})", "refId": "A"}],
+            "targets": [{"expr": f"topk(8, {root_usage_expr()})", "refId": "A", "legendFormat": "{{node}}"}],
             "fieldConfig": {
                 "defaults": {
                     "unit": "percent",
diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json
index 55c1909..0b2f69f 100644
--- a/services/monitoring/dashboards/atlas-overview.json
+++ b/services/monitoring/dashboards/atlas-overview.json
@@ -716,13 +716,13 @@
       },
       "gridPos": {
         "h": 9,
-        "w": 12,
+        "w": 8,
         "x": 0,
         "y": 10
       },
       "targets": [
         {
-          "expr": "100 * ( ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)) ) ) / sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) )",
+          "expr": "100 * ( ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) * 10)) ) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ), 1)",
           "refId": "A",
           "legendFormat": "{{namespace}}"
         }
@@ -751,20 +751,20 @@
     {
       "id": 12,
       "type": "piechart",
-      "title": "Namespace RAM share",
+      "title": "Namespace GPU share",
       "datasource": {
         "type": "prometheus",
         "uid": "atlas-vm"
       },
       "gridPos": {
         "h": 9,
-        "w": 12,
-        "x": 12,
+        "w": 8,
+        "x": 8,
         "y": 10
       },
       "targets": [
         {
-          "expr": "100 * ( ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)) ) ) / sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) )",
+          "expr": "100 * ( ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) * 10)) ) ) / clamp_min(sum( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) ), 1)",
           "refId": "A",
           "legendFormat": "{{namespace}}"
         }
@@ -792,6 +792,48 @@
     },
     {
       "id": 13,
+      "type": "piechart",
+      "title": "Namespace RAM share",
+      "datasource": {
+        "type": "prometheus",
+        "uid": "atlas-vm"
+      },
+      "gridPos": {
+        "h": 9,
+        "w": 8,
+        "x": 16,
+        "y": 10
+      },
+      "targets": [
+        {
+          "expr": "100 * ( ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) * 10)) ) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ), 1)",
+          "refId": "A",
+          "legendFormat": "{{namespace}}"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "percent"
+        },
+        "overrides": []
+      },
+      "options": {
+        "legend": {
+          "displayMode": "list",
+          "placement": "right"
+        },
+        "pieType": "pie",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        }
+      }
+    },
+    {
+      "id": 14,
       "type": "timeseries",
       "title": "Worker node CPU",
       "datasource": {
@@ -838,7 +880,7 @@
       ]
     },
     {
-      "id": 14,
+      "id": 15,
       "type": "timeseries",
       "title": "Worker node RAM",
       "datasource": {
@@ -885,7 +927,7 @@
       ]
     },
     {
-      "id": 15,
+      "id": 16,
       "type": "timeseries",
       "title": "Control plane CPU",
       "datasource": {
@@ -922,7 +964,7 @@
       }
     },
     {
-      "id": 16,
+      "id": 17,
       "type": "timeseries",
       "title": "Control plane RAM",
       "datasource": {
@@ -959,7 +1001,7 @@
       }
     },
     {
-      "id": 17,
+      "id": 18,
       "type": "timeseries",
       "title": "Cluster ingress throughput",
       "datasource": {
@@ -975,50 +1017,8 @@
       "targets": [
         {
           "expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])) or on() vector(0)",
-          "refId": "A"
-        }
-      ],
-      "fieldConfig": {
-        "defaults": {
-          "unit": "Bps"
-        },
-        "overrides": []
-      },
-      "options": {
-        "legend": {
-          "displayMode": "list",
-          "placement": "bottom"
-        },
-        "tooltip": {
-          "mode": "multi"
-        }
-      },
-      "links": [
-        {
-          "title": "Open atlas-network dashboard",
-          "url": "/d/atlas-network",
-          "targetBlank": true
-        }
-      ]
-    },
-    {
-      "id": 18,
-      "type": "timeseries",
-      "title": "Cluster egress throughput",
-      "datasource": {
-        "type": "prometheus",
-        "uid": "atlas-vm"
-      },
-      "gridPos": {
-        "h": 7,
-        "w": 12,
-        "x": 12,
-        "y": 34
-      },
-      "targets": [
-        {
-          "expr": "sum(rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])) or on() vector(0)",
-          "refId": "A"
+          "refId": "A",
+          "legendFormat": "Ingress"
         }
       ],
       "fieldConfig": {
@@ -1047,6 +1047,50 @@
     {
       "id": 19,
       "type": "timeseries",
+      "title": "Cluster egress throughput",
+      "datasource": {
+        "type": "prometheus",
+        "uid": "atlas-vm"
+      },
+      "gridPos": {
+        "h": 7,
+        "w": 12,
+        "x": 12,
+        "y": 34
+      },
+      "targets": [
+        {
+          "expr": "sum(rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])) or on() vector(0)",
+          "refId": "A",
+          "legendFormat": "Egress"
+        }
+      ],
+      "fieldConfig": {
+        "defaults": {
+          "unit": "Bps"
+        },
+        "overrides": []
+      },
+      "options": {
+        "legend": {
+          "displayMode": "list",
+          "placement": "bottom"
+        },
+        "tooltip": {
+          "mode": "multi"
+        }
+      },
+      "links": [
+        {
+          "title": "Open atlas-network dashboard",
+          "url": "/d/atlas-network",
+          "targetBlank": true
+        }
+      ]
+    },
+    {
+      "id": 20,
+      "type": "timeseries",
       "title": "Root filesystem usage",
       "datasource": {
         "type": "prometheus",
@@ -1093,7 +1137,7 @@
       ]
     },
     {
-      "id": 20,
+      "id": 21,
       "type": "bargauge",
       "title": "Nodes closest to full root disks",
       "datasource": {
@@ -1109,7 +1153,8 @@
       "targets": [
         {
           "expr": "topk(8, avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
-          "refId": "A"
+          "refId": "A",
+          "legendFormat": "{{node}}"
         }
       ],
       "fieldConfig": {
diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml
index deeeacc..0ac79db 100644
--- a/services/monitoring/grafana-dashboard-overview.yaml
+++ b/services/monitoring/grafana-dashboard-overview.yaml
@@ -725,13 +725,13 @@ data:
           },
           "gridPos": {
             "h": 9,
-            "w": 12,
+            "w": 8,
             "x": 0,
             "y": 10
           },
           "targets": [
             {
-              "expr": "100 * ( ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)) ) ) / sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) )",
+              "expr": "100 * ( ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) * 10)) ) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ), 1)",
               "refId": "A",
               "legendFormat": "{{namespace}}"
             }
@@ -760,20 +760,20 @@ data:
         {
           "id": 12,
           "type": "piechart",
-          "title": "Namespace RAM share",
+          "title": "Namespace GPU share",
           "datasource": {
             "type": "prometheus",
             "uid": "atlas-vm"
           },
           "gridPos": {
             "h": 9,
-            "w": 12,
-            "x": 12,
+            "w": 8,
+            "x": 8,
             "y": 10
           },
           "targets": [
             {
-              "expr": "100 * ( ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9)) ) ) / sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) )",
+              "expr": "100 * ( ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) * 10)) ) ) / clamp_min(sum( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) ), 1)",
               "refId": "A",
               "legendFormat": "{{namespace}}"
             }
@@ -801,6 +801,48 @@ data:
         },
         {
           "id": 13,
+          "type": "piechart",
+          "title": "Namespace RAM share",
+          "datasource": {
+            "type": "prometheus",
+            "uid": "atlas-vm"
+          },
+          "gridPos": {
+            "h": 9,
+            "w": 8,
+            "x": 16,
+            "y": 10
+          },
+          "targets": [
+            {
+              "expr": "100 * ( ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) and on(namespace) ( topk(10, (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace)) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ( sum(kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"}) by (namespace) * 10)) ) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ), 1)",
+              "refId": "A",
+              "legendFormat": "{{namespace}}"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "percent"
+            },
+            "overrides": []
+          },
+          "options": {
+            "legend": {
+              "displayMode": "list",
+              "placement": "right"
+            },
+            "pieType": "pie",
+            "reduceOptions": {
+              "calcs": [
+                "lastNotNull"
+              ],
+              "fields": "",
+              "values": false
+            }
+          }
+        },
+        {
+          "id": 14,
           "type": "timeseries",
           "title": "Worker node CPU",
           "datasource": {
@@ -847,7 +889,7 @@ data:
           ]
         },
         {
-          "id": 14,
+          "id": 15,
           "type": "timeseries",
           "title": "Worker node RAM",
           "datasource": {
@@ -894,7 +936,7 @@ data:
           ]
         },
         {
-          "id": 15,
+          "id": 16,
           "type": "timeseries",
           "title": "Control plane CPU",
           "datasource": {
@@ -931,7 +973,7 @@ data:
           }
         },
         {
-          "id": 16,
+          "id": 17,
           "type": "timeseries",
           "title": "Control plane RAM",
           "datasource": {
@@ -968,7 +1010,7 @@ data:
           }
         },
         {
-          "id": 17,
+          "id": 18,
           "type": "timeseries",
           "title": "Cluster ingress throughput",
           "datasource": {
@@ -984,50 +1026,8 @@ data:
           "targets": [
             {
               "expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])) or on() vector(0)",
-              "refId": "A"
-            }
-          ],
-          "fieldConfig": {
-            "defaults": {
-              "unit": "Bps"
-            },
-            "overrides": []
-          },
-          "options": {
-            "legend": {
-              "displayMode": "list",
-              "placement": "bottom"
-            },
-            "tooltip": {
-              "mode": "multi"
-            }
-          },
-          "links": [
-            {
-              "title": "Open atlas-network dashboard",
-              "url": "/d/atlas-network",
-              "targetBlank": true
-            }
-          ]
-        },
-        {
-          "id": 18,
-          "type": "timeseries",
-          "title": "Cluster egress throughput",
-          "datasource": {
-            "type": "prometheus",
-            "uid": "atlas-vm"
-          },
-          "gridPos": {
-            "h": 7,
-            "w": 12,
-            "x": 12,
-            "y": 34
-          },
-          "targets": [
-            {
-              "expr": "sum(rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])) or on() vector(0)",
-              "refId": "A"
+              "refId": "A",
+              "legendFormat": "Ingress"
             }
           ],
           "fieldConfig": {
@@ -1056,6 +1056,50 @@ data:
         {
           "id": 19,
           "type": "timeseries",
+          "title": "Cluster egress throughput",
+          "datasource": {
+            "type": "prometheus",
+            "uid": "atlas-vm"
+          },
+          "gridPos": {
+            "h": 7,
+            "w": 12,
+            "x": 12,
+            "y": 34
+          },
+          "targets": [
+            {
+              "expr": "sum(rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])) or on() vector(0)",
+              "refId": "A",
+              "legendFormat": "Egress"
+            }
+          ],
+          "fieldConfig": {
+            "defaults": {
+              "unit": "Bps"
+            },
+            "overrides": []
+          },
+          "options": {
+            "legend": {
+              "displayMode": "list",
+              "placement": "bottom"
+            },
+            "tooltip": {
+              "mode": "multi"
+            }
+          },
+          "links": [
+            {
+              "title": "Open atlas-network dashboard",
+              "url": "/d/atlas-network",
+              "targetBlank": true
+            }
+          ]
+        },
+        {
+          "id": 20,
+          "type": "timeseries",
           "title": "Root filesystem usage",
           "datasource": {
             "type": "prometheus",
@@ -1102,7 +1146,7 @@ data:
           ]
         },
         {
-          "id": 20,
+          "id": 21,
           "type": "bargauge",
           "title": "Nodes closest to full root disks",
           "datasource": {
@@ -1118,7 +1162,8 @@ data:
           "targets": [
             {
               "expr": "topk(8, avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
-              "refId": "A"
+              "refId": "A",
+              "legendFormat": "{{node}}"
             }
           ],
           "fieldConfig": {