# services/monitoring/grafana-dashboard-nodes.yaml apiVersion: v1 kind: ConfigMap metadata: name: grafana-dashboard-nodes labels: grafana_dashboard: "1" data: atlas-nodes.json: | { "uid": "atlas-nodes", "title": "Atlas Nodes", "folderUid": "atlas-nodes", "editable": true, "panels": [ { "id": 1, "type": "stat", "title": "Node count", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 5, "w": 6, "x": 0, "y": 0 }, "targets": [ { "expr": "count(kube_node_info)", "refId": "A" } ], "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgba(115, 115, 115, 1)", "value": null }, { "color": "green", "value": 1 } ] }, "unit": "none" }, "overrides": [] }, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "value" } }, { "id": 2, "type": "stat", "title": "Ready nodes", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 5, "w": 6, "x": 6, "y": 0 }, "targets": [ { "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\"})", "refId": "A" } ], "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgba(115, 115, 115, 1)", "value": null }, { "color": "green", "value": 1 } ] }, "unit": "none" }, "overrides": [] }, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "value" } }, { "id": 3, "type": "stat", "title": "Control plane CPU avg", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 5, "w": 6, "x": 12, "y": 0 }, "targets": [ { "expr": "avg by (node) ((((1 - label_replace(rate(node_cpu_seconds_total{mode=\"idle\"}[5m]), \"internal_ip\", \"$1\", \"instance\", \"([^:]+):.*\")) * 100) * on (internal_ip) group_left(node) kube_node_info)) * on(node) group_left() kube_node_info{node=~\"titan-0a|titan-0b|titan-0c\"}", "refId": "A", "legendFormat": "{{node}}" } ], "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgba(115, 115, 115, 1)", "value": null }, { "color": "green", "value": 1 } ] }, "unit": "percent" }, "overrides": [] }, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "value_and_name" } }, { "id": 4, "type": "stat", "title": "Control plane RAM avg", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 5, "w": 6, "x": 18, "y": 0 }, "targets": [ { "expr": "avg by (node) (((label_replace((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes, \"internal_ip\", \"$1\", \"instance\", \"([^:]+):.*\") * 100) * on (internal_ip) group_left(node) kube_node_info)) * on(node) group_left() kube_node_info{node=~\"titan-0a|titan-0b|titan-0c\"}", "refId": "A", "legendFormat": "{{node}}" } ], "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "rgba(115, 115, 115, 1)", "value": null }, { "color": "green", "value": 1 } ] }, "unit": "percent" }, "overrides": [] }, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "value_and_name" } }, { "id": 5, "type": "timeseries", "title": "Node CPU", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 5 }, "targets": [ { "expr": "avg by (node) ((((1 - label_replace(rate(node_cpu_seconds_total{mode=\"idle\"}[5m]), \"internal_ip\", \"$1\", \"instance\", \"([^:]+):.*\")) * 100) * on (internal_ip) group_left(node) kube_node_info))", "refId": "A", "legendFormat": "{{node}}" } ], "fieldConfig": { "defaults": { "unit": "percent" }, "overrides": [] }, "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": [ "last" ] }, "tooltip": { "mode": "multi" } } }, { "id": 6, "type": "timeseries", "title": "Node RAM", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 14 }, "targets": [ { "expr": "avg by (node) (((label_replace((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes, \"internal_ip\", \"$1\", \"instance\", \"([^:]+):.*\") * 100) * on (internal_ip) group_left(node) kube_node_info))", "refId": "A", "legendFormat": "{{node}}" } ], "fieldConfig": { "defaults": { "unit": "percent" }, "overrides": [] }, "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": [ "last" ] }, "tooltip": { "mode": "multi" } } }, { "id": 7, "type": "timeseries", "title": "Root filesystem", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 9, "w": 24, "x": 0, "y": 23 }, "targets": [ { "expr": "avg by (node) (((1 - (label_replace(node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"}, \"internal_ip\", \"$1\", \"instance\", \"([^:]+):.*\"))) * 100) * on (internal_ip) group_left(node) kube_node_info)", "refId": "A", "legendFormat": "{{node}}" } ], "fieldConfig": { "defaults": { "unit": "percent" }, "overrides": [] }, "options": { "legend": { "displayMode": "table", "placement": "right" }, "tooltip": { "mode": "multi" } }, "timeFrom": "7d" } ], "time": { "from": "now-12h", "to": "now" }, "annotations": { "list": [] }, "schemaVersion": 39, "style": "dark", "tags": [ "atlas", "nodes" ] }