From eb3991b6283bfb606f094778fec437f0daef6203 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sat, 15 Nov 2025 11:59:48 -0300 Subject: [PATCH] dashboards: improve public view and fix color --- .../monitoring/grafana-dashboard-public.yaml | 115 +++++++++++++++--- .../monitoring/grafana-dashboard-sre.yaml | 2 +- 2 files changed, 100 insertions(+), 17 deletions(-) diff --git a/services/monitoring/grafana-dashboard-public.yaml b/services/monitoring/grafana-dashboard-public.yaml index db5d6c1..aee871f 100644 --- a/services/monitoring/grafana-dashboard-public.yaml +++ b/services/monitoring/grafana-dashboard-public.yaml @@ -177,31 +177,114 @@ data: "y": 7 }, "id": 3, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, "targets": [ { "datasource": { "type": "prometheus", "uid": "atlas-vm" }, - "expr": "sum(rate(container_cpu_usage_seconds_total{namespace!=\"\", container!=\"\"}[5m])) by (namespace)", + "expr": "sum(kube_pod_status_phase{phase=\"Running\"}) by (namespace)", "legendFormat": "{{namespace}}", "refId": "A" } ], - "title": "Namespace CPU (5m avg)", - "type": "timeseries" + "title": "Running pods per namespace", + "type": "bargauge", + "options": { + "displayMode": "gradient", + "orientation": "horizontal", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "showUnfilled": false + } + }, + { + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 4, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\"})", + "legendFormat": "Ready", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"false\"})", + "legendFormat": "Not Ready", + "refId": "B" + } + ], + "title": "Node readiness", + "type": "piechart", + "options": { + "legend": { + "displayMode": "table", + "placement": "right" + }, + "pieType": "donut" + } + }, + { + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 5, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "expr": "sum by (namespace) (increase(kube_pod_status_phase{phase=\"Failed\"}[1d]))", + "legendFormat": "{{namespace}}", + "refId": "A" + } + ], + "title": "Failed pods (24h)", + "type": "table", + "fieldConfig": { + "defaults": { + "unit": "none", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + {"color": "green", "value": null}, + {"color": "red", "value": 1} + ] + } + }, + "overrides": [] + }, + "options": { + "showHeader": true + } } ], "refresh": "30s", @@ -215,7 +298,7 @@ data: "list": [] }, "time": { - "from": "now-6h", + "from": "now-12h", "to": "now" }, "timepicker": {}, diff --git a/services/monitoring/grafana-dashboard-sre.yaml b/services/monitoring/grafana-dashboard-sre.yaml index 12995af..d146275 100644 --- a/services/monitoring/grafana-dashboard-sre.yaml +++ b/services/monitoring/grafana-dashboard-sre.yaml @@ -38,7 +38,7 @@ data: "fieldConfig": { "defaults": { "color": { - "mode": "continuous" + "mode": "continuous-RdYlGr" }, "mappings": [], "max": 100,