2025-11-14 00:02:59 -03:00
# services/monitoring/grafana-dashboard-public.yaml
apiVersion : v1
kind : ConfigMap
metadata :
name : grafana-dashboard-public
labels :
grafana_dashboard : "1"
data :
atlas-public-overview.json : |
{
"annotations": {
"list": [
{
"builtIn": 1 ,
"datasource": {
"type": "datasource" ,
"uid": "grafana"
},
"enable": true ,
"hide": true ,
"iconColor": "rgba(0, 211, 255, 1)" ,
"name": "Annotations & Alerts" ,
"type": "dashboard"
}
]
},
"editable": false ,
2025-11-15 21:03:11 -03:00
"folderUid": "atlas-public" ,
2025-11-14 00:02:59 -03:00
"graphTooltip": 0 ,
"links": [ ] ,
"panels": [
{
2025-11-15 21:03:11 -03:00
"id": 1 ,
"type": "stat" ,
"title": "Running pods" ,
2025-11-14 00:02:59 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
2025-11-15 21:03:11 -03:00
"gridPos": {
"h": 6 ,
"w": 6 ,
"x": 0 ,
"y": 0
},
"targets": [
{
"expr": "sum(kube_pod_status_phase{phase=\"Running\"})" ,
"refId": "A"
}
] ,
2025-11-14 00:02:59 -03:00
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [ ] ,
"thresholds": {
"mode": "absolute" ,
"steps": [
{
2025-11-15 21:03:11 -03:00
"color": "rgba(115, 115, 115, 1)" ,
2025-11-14 00:02:59 -03:00
"value": null
2025-11-15 21:03:11 -03:00
},
{
"color": "green" ,
"value": 1
2025-11-14 00:02:59 -03:00
}
]
},
"unit": "none"
},
"overrides": [ ]
},
"options": {
"colorMode": "value" ,
"graphMode": "area" ,
2025-11-15 21:03:11 -03:00
"justifyMode": "center" ,
2025-11-14 00:02:59 -03:00
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
2025-11-15 21:03:11 -03:00
}
}
},
{
"id": 2 ,
"type": "stat" ,
"title": "Ready node percentage" ,
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 6 ,
"w": 6 ,
"x": 6 ,
"y": 0
2025-11-14 00:02:59 -03:00
},
"targets": [
{
2025-11-15 21:03:11 -03:00
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\"}) / sum(kube_node_info) * 100" ,
2025-11-14 00:02:59 -03:00
"refId": "A"
}
] ,
2025-11-15 21:03:11 -03:00
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [ ] ,
"thresholds": {
"mode": "percentage" ,
"steps": [
{
"color": "red" ,
"value": null
},
{
"color": "orange" ,
"value": 90
},
{
"color": "green" ,
"value": 98
}
]
},
"unit": "percent"
},
"overrides": [ ]
},
"options": {
"colorMode": "value" ,
"graphMode": "area" ,
"justifyMode": "center" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
}
}
2025-11-14 00:02:59 -03:00
},
{
2025-11-15 21:03:11 -03:00
"id": 3 ,
"type": "stat" ,
"title": "Cluster CPU saturation" ,
2025-11-14 00:02:59 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
2025-11-15 21:03:11 -03:00
"gridPos": {
"h": 6 ,
"w": 6 ,
"x": 12 ,
"y": 0
},
"targets": [
{
"expr": "avg((1 - rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)" ,
"refId": "A"
}
] ,
2025-11-14 00:02:59 -03:00
"fieldConfig": {
"defaults": {
"color": {
2025-11-15 21:03:11 -03:00
"mode": "palette-classic"
2025-11-14 00:02:59 -03:00
},
"mappings": [ ] ,
"thresholds": {
"mode": "percentage" ,
"steps": [
{
"color": "green" ,
"value": null
},
{
"color": "yellow" ,
2025-11-15 21:03:11 -03:00
"value": 65
2025-11-14 00:02:59 -03:00
},
{
"color": "red" ,
"value": 85
}
]
},
"unit": "percent"
},
"overrides": [ ]
},
2025-11-15 21:03:11 -03:00
"options": {
"colorMode": "value" ,
"graphMode": "area" ,
"justifyMode": "center" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
}
}
},
{
"id": 4 ,
"type": "stat" ,
"title": "Cluster memory usage" ,
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
2025-11-14 00:02:59 -03:00
"gridPos": {
2025-11-15 21:03:11 -03:00
"h": 6 ,
2025-11-14 00:02:59 -03:00
"w": 6 ,
2025-11-15 21:03:11 -03:00
"x": 18 ,
2025-11-14 00:02:59 -03:00
"y": 0
},
2025-11-15 21:03:11 -03:00
"targets": [
{
"expr": "100 - (sum(node_memory_MemAvailable_bytes) / sum(node_memory_MemTotal_bytes) * 100)" ,
"refId": "A"
}
] ,
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [ ] ,
"thresholds": {
"mode": "percentage" ,
"steps": [
{
"color": "green" ,
"value": null
},
{
"color": "yellow" ,
"value": 70
},
{
"color": "red" ,
"value": 85
}
]
},
"unit": "percent"
},
"overrides": [ ]
},
2025-11-14 00:02:59 -03:00
"options": {
"colorMode": "value" ,
"graphMode": "area" ,
2025-11-15 21:03:11 -03:00
"justifyMode": "center" ,
2025-11-14 00:02:59 -03:00
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
2025-11-15 21:03:11 -03:00
}
}
},
{
"id": 5 ,
"type": "piechart" ,
"title": "Namespace CPU share" ,
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 9 ,
"w": 12 ,
"x": 0 ,
"y": 6
2025-11-14 00:02:59 -03:00
},
"targets": [
{
2025-11-15 21:03:11 -03:00
"expr": "topk(8, sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace))" ,
2025-11-14 00:02:59 -03:00
"refId": "A"
}
] ,
2025-11-15 21:03:11 -03:00
"fieldConfig": {
"defaults": {
"unit": "cores"
},
"overrides": [ ]
},
"options": {
"legend": {
"displayMode": "list" ,
"placement": "right"
},
"pieType": "pie" ,
"reduceOptions": {
"calcs": [
"lastNotNull"
] ,
"fields": "" ,
"values": false
}
}
2025-11-14 00:02:59 -03:00
},
{
2025-11-15 21:03:11 -03:00
"id": 6 ,
"type": "piechart" ,
"title": "Namespace memory share" ,
2025-11-14 00:02:59 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
2025-11-15 21:03:11 -03:00
"h": 9 ,
2025-11-14 00:02:59 -03:00
"w": 12 ,
2025-11-15 21:03:11 -03:00
"x": 12 ,
"y": 6
2025-11-14 00:02:59 -03:00
},
2025-11-15 11:59:48 -03:00
"targets": [
{
2025-11-15 21:03:11 -03:00
"expr": "topk(8, sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace))" ,
2025-11-15 11:59:48 -03:00
"refId": "A"
}
] ,
2025-11-15 21:03:11 -03:00
"fieldConfig": {
"defaults": {
"unit": "bytes"
},
"overrides": [ ]
},
2025-11-14 00:02:59 -03:00
"options": {
2025-11-15 21:03:11 -03:00
"legend": {
"displayMode": "list" ,
"placement": "right"
},
"pieType": "donut" ,
2025-11-15 11:59:48 -03:00
"reduceOptions": {
2025-11-15 21:03:11 -03:00
"calcs": [
"lastNotNull"
] ,
2025-11-15 11:59:48 -03:00
"fields": "" ,
"values": false
2025-11-15 21:03:11 -03:00
}
2025-11-15 11:59:48 -03:00
}
},
{
2025-11-15 21:03:11 -03:00
"id": 7 ,
"type": "timeseries" ,
"title": "Node CPU usage (per node)" ,
2025-11-15 11:59:48 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 8 ,
"w": 12 ,
"x": 0 ,
"y": 15
},
"targets": [
{
2025-11-15 21:03:11 -03:00
"expr": "avg(rate(node_cpu_seconds_total{mode!=\"idle\"}[5m])) by (instance) * 100" ,
"refId": "A" ,
"legendFormat": "{{instance}}"
}
] ,
"fieldConfig": {
"defaults": {
"unit": "percent"
},
"overrides": [ ]
},
"options": {
"legend": {
"displayMode": "table" ,
"placement": "bottom"
2025-11-14 00:02:59 -03:00
},
2025-11-15 21:03:11 -03:00
"tooltip": {
"mode": "multi"
}
}
},
{
"id": 8 ,
"type": "timeseries" ,
"title": "Node memory usage (per node)" ,
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 8 ,
"w": 12 ,
"x": 12 ,
"y": 15
},
"targets": [
2025-11-15 11:59:48 -03:00
{
2025-11-15 21:03:11 -03:00
"expr": "avg((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100) by (instance)" ,
"refId": "A" ,
"legendFormat": "{{instance}}"
2025-11-14 00:02:59 -03:00
}
2025-11-15 11:59:48 -03:00
] ,
2025-11-15 21:03:11 -03:00
"fieldConfig": {
"defaults": {
"unit": "percent"
},
"overrides": [ ]
},
2025-11-15 11:59:48 -03:00
"options": {
"legend": {
"displayMode": "table" ,
2025-11-15 21:03:11 -03:00
"placement": "bottom"
2025-11-15 11:59:48 -03:00
},
2025-11-15 21:03:11 -03:00
"tooltip": {
"mode": "multi"
}
2025-11-15 11:59:48 -03:00
}
},
{
2025-11-15 21:03:11 -03:00
"id": 9 ,
"type": "table" ,
"title": "Key service availability" ,
2025-11-15 11:59:48 -03:00
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
2025-11-14 00:02:59 -03:00
},
2025-11-15 11:59:48 -03:00
"gridPos": {
"h": 8 ,
"w": 12 ,
"x": 0 ,
"y": 23
},
2025-11-14 00:02:59 -03:00
"targets": [
{
2025-11-15 21:03:11 -03:00
"expr": "max by (service) (up{service=~\"traefik|gitea|vault|victoria-metrics-single|grafana|alertmanager\"})" ,
2025-11-14 00:02:59 -03:00
"refId": "A"
}
] ,
2025-11-15 11:59:48 -03:00
"fieldConfig": {
"defaults": {
2025-11-15 21:03:11 -03:00
"mappings": [
{
"id": 0 ,
"type": 1 ,
"value": "0" ,
"text": "Down"
},
{
"id": 1 ,
"type": 1 ,
"value": "1" ,
"text": "Up"
}
] ,
2025-11-15 11:59:48 -03:00
"thresholds": {
"mode": "absolute" ,
"steps": [
2025-11-15 21:03:11 -03:00
{
"color": "red" ,
"value": null
},
{
"color": "green" ,
"value": 1
}
2025-11-15 11:59:48 -03:00
]
}
},
"overrides": [ ]
},
"options": {
"showHeader": true
}
2025-11-15 21:03:11 -03:00
},
{
"id": 10 ,
"type": "table" ,
"title": "Failed pods (24h trend)" ,
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 8 ,
"w": 12 ,
"x": 12 ,
"y": 23
},
"targets": [
{
"expr": "topk(10, sum(increase(kube_pod_status_phase{phase=\"Failed\"}[24h])) by (namespace))" ,
"refId": "A"
}
] ,
"fieldConfig": {
"defaults": {
"unit": "none"
},
"overrides": [ ]
},
"options": {
"showHeader": true
}
},
{
"id": 11 ,
"type": "timeseries" ,
"title": "Cluster network throughput" ,
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 8 ,
"w": 12 ,
"x": 0 ,
"y": 31
},
"targets": [
{
"expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"\"}[5m]))" ,
"refId": "A" ,
"legendFormat": "Receive"
},
{
"expr": "sum(rate(container_network_transmit_bytes_total{namespace!=\"\"}[5m]))" ,
"refId": "B" ,
"legendFormat": "Transmit"
}
] ,
"fieldConfig": {
"defaults": {
"unit": "Bps"
},
"overrides": [ ]
},
"options": {
"legend": {
"displayMode": "table" ,
"placement": "bottom"
},
"tooltip": {
"mode": "multi"
}
}
},
{
"id": 12 ,
"type": "timeseries" ,
"title": "Storage usage across nodes" ,
"datasource": {
"type": "prometheus" ,
"uid": "atlas-vm"
},
"gridPos": {
"h": 8 ,
"w": 12 ,
"x": 12 ,
"y": 31
},
"targets": [
{
"expr": "100 - (sum(node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"}) * 100)" ,
"refId": "A"
}
] ,
"fieldConfig": {
"defaults": {
"unit": "percent"
},
"overrides": [ ]
},
"options": {
"legend": {
"displayMode": "list" ,
"placement": "bottom"
}
}
},
{
"id": 13 ,
"type": "text" ,
"title": "About this dashboard" ,
"gridPos": {
"h": 6 ,
"w": 24 ,
"x": 0 ,
"y": 39
},
"options": {
"content": "### Atlas at a glance\n- Raspberry Pi + Jetson hybrid cluster with Flux-managed GitOps\n- Metrics powered by VictoriaMetrics, visualized by Grafana\n- Login for SRE mode with pod-level drilldowns, alert routes, and storage health" ,
"mode": "markdown"
}
2025-11-14 00:02:59 -03:00
}
] ,
"refresh": "30s" ,
"schemaVersion": 39 ,
"style": "dark" ,
"tags": [
"atlas" ,
"public"
] ,
"templating": {
"list": [ ]
},
"time": {
2025-11-15 11:59:48 -03:00
"from": "now-12h" ,
2025-11-14 00:02:59 -03:00
"to": "now"
},
"title": "Atlas Public Overview" ,
"uid": "atlas-public" ,
2025-11-15 21:03:11 -03:00
"version": 3
2025-11-14 00:02:59 -03:00
}
2025-11-15 21:03:11 -03:00