titan-iac/services/monitoring/grafana-dashboard-overview.yaml

1506 lines
41 KiB
YAML
Raw Normal View History

# services/monitoring/grafana-dashboard-overview.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-overview
labels:
grafana_dashboard: "1"
data:
atlas-overview.json: |
{
"uid": "atlas-overview",
"title": "Atlas Overview",
2025-11-17 16:27:38 -03:00
"folderUid": "atlas-overview",
"editable": false,
"annotations": {
2025-11-17 16:27:38 -03:00
"list": []
},
"panels": [
{
2025-11-15 21:03:11 -03:00
"id": 1,
"type": "stat",
2025-11-17 19:49:50 -03:00
"title": "Workers ready",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
2025-11-15 21:03:11 -03:00
"gridPos": {
2025-11-16 00:55:28 -03:00
"h": 5,
"w": 4,
2025-11-15 21:03:11 -03:00
"x": 0,
"y": 0
},
"targets": [
{
2025-11-17 19:49:50 -03:00
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
2025-11-15 21:03:11 -03:00
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
2025-11-17 19:49:50 -03:00
"color": "red",
"value": null
2025-11-15 21:03:11 -03:00
},
2025-11-17 19:49:50 -03:00
{
"color": "orange",
"value": 16
},
{
"color": "yellow",
"value": 17
},
2025-11-15 21:03:11 -03:00
{
"color": "green",
2025-11-17 19:49:50 -03:00
"value": 18
}
]
},
2025-11-17 16:27:38 -03:00
"unit": "none",
"custom": {
2025-11-17 19:49:50 -03:00
"displayMode": "auto",
"valueSuffix": "/18"
2025-11-17 16:27:38 -03:00
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
2025-11-15 21:03:11 -03:00
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
2025-11-16 00:55:28 -03:00
},
"textMode": "value"
2025-11-15 21:03:11 -03:00
}
},
{
"id": 2,
"type": "stat",
2025-11-17 19:49:50 -03:00
"title": "Control plane ready",
2025-11-15 21:03:11 -03:00
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
2025-11-16 00:55:28 -03:00
"h": 5,
"w": 4,
"x": 4,
2025-11-15 21:03:11 -03:00
"y": 0
},
"targets": [
{
2025-11-17 19:49:50 -03:00
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"})",
"refId": "A"
}
],
2025-11-15 21:03:11 -03:00
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
2025-11-16 00:55:28 -03:00
"mode": "absolute",
2025-11-15 21:03:11 -03:00
"steps": [
{
2025-11-17 16:27:38 -03:00
"color": "red",
2025-11-15 21:03:11 -03:00
"value": null
},
{
2025-11-16 00:55:28 -03:00
"color": "green",
2025-11-17 19:49:50 -03:00
"value": 3
2025-11-16 00:55:28 -03:00
}
]
},
2025-11-17 16:27:38 -03:00
"unit": "none",
"custom": {
"displayMode": "auto",
2025-11-17 19:49:50 -03:00
"valueSuffix": "/3"
2025-11-17 16:27:38 -03:00
}
2025-11-16 00:55:28 -03:00
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
}
},
{
"id": 3,
"type": "stat",
2025-11-17 19:49:50 -03:00
"title": "Control plane workloads",
2025-11-16 00:55:28 -03:00
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 4,
"x": 8,
"y": 0
},
"targets": [
{
2025-11-17 19:49:50 -03:00
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring\"})",
2025-11-16 00:55:28 -03:00
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
2025-11-17 19:49:50 -03:00
"color": "green",
2025-11-16 00:55:28 -03:00
"value": null
2025-11-15 21:03:11 -03:00
},
{
2025-11-17 19:49:50 -03:00
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
2025-11-17 16:27:38 -03:00
"value": 3
2025-11-15 21:03:11 -03:00
}
]
},
2025-11-17 16:27:38 -03:00
"unit": "none",
"custom": {
2025-11-17 19:49:50 -03:00
"displayMode": "auto"
2025-11-17 16:27:38 -03:00
}
2025-11-15 21:03:11 -03:00
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
2025-11-16 00:55:28 -03:00
},
"textMode": "value"
2025-11-17 19:49:50 -03:00
},
"links": [
{
"title": "Open atlas-pods dashboard",
"url": "/d/atlas-pods",
"targetBlank": true
}
]
},
{
2025-11-16 00:55:28 -03:00
"id": 4,
2025-11-15 21:03:11 -03:00
"type": "stat",
2025-11-17 19:49:50 -03:00
"title": "Problem pods",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
2025-11-15 21:03:11 -03:00
"gridPos": {
2025-11-16 00:55:28 -03:00
"h": 5,
"w": 4,
2025-11-15 21:03:11 -03:00
"x": 12,
"y": 0
},
"targets": [
{
2025-11-17 19:49:50 -03:00
"expr": "sum(max by (namespace,pod) (kube_pod_status_phase{phase!~\"Running|Succeeded\"}))",
2025-11-15 21:03:11 -03:00
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"color": {
2025-11-15 21:03:11 -03:00
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
2025-11-17 16:27:38 -03:00
"color": "green",
"value": null
},
{
2025-11-17 19:24:03 -03:00
"color": "yellow",
"value": 1
2025-11-17 19:24:03 -03:00
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
2025-11-17 16:27:38 -03:00
"unit": "none",
"custom": {
"displayMode": "auto"
}
},
"overrides": []
},
2025-11-15 21:03:11 -03:00
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
2025-11-16 00:55:28 -03:00
},
"textMode": "value"
2025-11-17 16:27:38 -03:00
},
"links": [
{
"title": "Open atlas-pods dashboard",
"url": "/d/atlas-pods",
"targetBlank": true
}
]
2025-11-15 21:03:11 -03:00
},
{
2025-11-16 00:55:28 -03:00
"id": 5,
2025-11-15 21:03:11 -03:00
"type": "stat",
2025-11-17 19:49:50 -03:00
"title": "Stuck terminating",
2025-11-15 21:03:11 -03:00
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
2025-11-16 00:55:28 -03:00
"h": 5,
"w": 4,
"x": 16,
"y": 0
},
2025-11-15 21:03:11 -03:00
"targets": [
{
2025-11-17 19:49:50 -03:00
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)))",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
2025-11-17 16:27:38 -03:00
"color": "green",
"value": null
},
{
2025-11-17 19:24:03 -03:00
"color": "yellow",
"value": 1
2025-11-17 19:24:03 -03:00
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
2025-11-17 16:27:38 -03:00
"unit": "none",
"custom": {
"displayMode": "auto"
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
2025-11-17 16:27:38 -03:00
},
"links": [
{
"title": "Open atlas-pods dashboard",
"url": "/d/atlas-pods",
"targetBlank": true
}
]
},
{
"id": 6,
"type": "stat",
2025-11-17 19:49:50 -03:00
"title": "Running pods",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 4,
"x": 20,
"y": 0
},
"targets": [
{
2025-11-17 19:49:50 -03:00
"expr": "sum(kube_pod_status_phase{phase=\"Running\"})",
2025-11-15 21:03:11 -03:00
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
2025-11-17 19:49:50 -03:00
"color": "rgba(115, 115, 115, 1)",
"value": null
},
{
2025-11-17 19:49:50 -03:00
"color": "green",
"value": 1
}
]
},
2025-11-17 16:27:38 -03:00
"unit": "none",
"custom": {
"displayMode": "auto"
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
2025-11-17 19:49:50 -03:00
}
},
{
"id": 7,
"type": "stat",
"title": "Hottest node: CPU",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
2025-11-17 16:27:38 -03:00
"w": 6,
"x": 0,
"y": 5
},
"targets": [
{
2025-11-17 20:19:20 -03:00
"expr": "avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
2025-11-17 19:24:03 -03:00
"refId": "A",
"legendFormat": "{{node}}",
"instant": true
}
],
2025-11-15 21:03:11 -03:00
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
2025-11-15 21:03:11 -03:00
},
{
"color": "red",
"value": 85
2025-11-15 21:03:11 -03:00
}
]
},
2025-11-17 16:27:38 -03:00
"unit": "percent",
"custom": {
"displayMode": "auto"
2025-11-17 19:56:57 -03:00
}
2025-11-15 21:03:11 -03:00
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
2025-11-15 21:03:11 -03:00
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
2025-11-16 00:55:28 -03:00
},
2025-11-17 19:49:50 -03:00
"textMode": "name_and_value"
2025-11-17 16:27:38 -03:00
},
"links": [
{
"title": "Open atlas-nodes dashboard",
"url": "/d/atlas-nodes",
"targetBlank": true
}
]
2025-11-16 00:55:28 -03:00
},
{
"id": 8,
2025-11-16 00:55:28 -03:00
"type": "stat",
"title": "Hottest node: RAM",
2025-11-16 00:55:28 -03:00
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
2025-11-17 16:27:38 -03:00
"w": 6,
"x": 6,
"y": 5
2025-11-16 00:55:28 -03:00
},
"targets": [
{
2025-11-17 20:19:20 -03:00
"expr": "avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
2025-11-17 19:24:03 -03:00
"refId": "A",
"legendFormat": "{{node}}",
"instant": true
2025-11-15 21:03:11 -03:00
}
2025-11-16 00:55:28 -03:00
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "percentage",
2025-11-16 00:55:28 -03:00
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
2025-11-16 00:55:28 -03:00
},
{
"color": "red",
"value": 85
2025-11-16 00:55:28 -03:00
}
]
},
2025-11-17 16:27:38 -03:00
"unit": "percent",
"custom": {
"displayMode": "auto"
2025-11-17 19:56:57 -03:00
}
2025-11-16 00:55:28 -03:00
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
2025-11-17 19:49:50 -03:00
"textMode": "name_and_value"
2025-11-17 16:27:38 -03:00
},
"links": [
{
"title": "Open atlas-nodes dashboard",
"url": "/d/atlas-nodes",
"targetBlank": true
}
]
2025-11-15 21:03:11 -03:00
},
{
"id": 9,
2025-11-17 16:27:38 -03:00
"type": "stat",
2025-11-17 20:00:40 -03:00
"title": "Hottest node: NET (rx+tx)",
2025-11-17 16:27:38 -03:00
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 6,
"x": 12,
"y": 5
},
"targets": [
{
2025-11-17 20:19:20 -03:00
"expr": "label_replace(topk(1, avg by (node) ((sum by (instance) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]))) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
2025-11-17 19:24:03 -03:00
"refId": "A",
"legendFormat": "{{node}}",
"instant": true
2025-11-17 16:27:38 -03:00
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgba(115, 115, 115, 1)",
"value": null
},
{
"color": "green",
"value": 1
}
]
},
2025-11-17 18:55:11 -03:00
"unit": "Bps",
2025-11-17 16:27:38 -03:00
"custom": {
"displayMode": "auto"
2025-11-17 19:56:57 -03:00
}
2025-11-17 16:27:38 -03:00
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
2025-11-17 19:49:50 -03:00
"textMode": "name_and_value"
2025-11-17 16:27:38 -03:00
},
"links": [
{
"title": "Open atlas-nodes dashboard",
"url": "/d/atlas-nodes",
"targetBlank": true
}
]
},
{
"id": 10,
"type": "stat",
2025-11-17 20:00:40 -03:00
"title": "Hottest node: I/O (r+w)",
2025-11-17 16:27:38 -03:00
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 5,
"w": 6,
"x": 18,
"y": 5
},
"targets": [
{
2025-11-17 20:19:20 -03:00
"expr": "label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m]))) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
2025-11-17 19:24:03 -03:00
"refId": "A",
"legendFormat": "{{node}}",
"instant": true
2025-11-17 16:27:38 -03:00
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgba(115, 115, 115, 1)",
"value": null
},
{
"color": "green",
"value": 1
}
]
},
2025-11-17 18:55:11 -03:00
"unit": "Bps",
2025-11-17 16:27:38 -03:00
"custom": {
"displayMode": "auto"
2025-11-17 19:56:57 -03:00
}
2025-11-17 16:27:38 -03:00
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
2025-11-17 19:49:50 -03:00
"textMode": "name_and_value"
2025-11-17 16:27:38 -03:00
},
"links": [
{
"title": "Open atlas-nodes dashboard",
"url": "/d/atlas-nodes",
"targetBlank": true
}
]
},
{
"id": 11,
2025-11-15 21:03:11 -03:00
"type": "piechart",
"title": "Namespace CPU share",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
2025-11-17 16:27:38 -03:00
"y": 10
},
"targets": [
{
2025-11-17 16:27:38 -03:00
"expr": "topk(10, sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\"}[5m])) by (namespace))",
"refId": "A",
"legendFormat": "{{namespace}}"
}
],
2025-11-15 21:03:11 -03:00
"fieldConfig": {
"defaults": {
"unit": "percent"
2025-11-15 21:03:11 -03:00
},
"overrides": []
},
"options": {
"legend": {
"displayMode": "list",
"placement": "right"
},
"pieType": "pie",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
}
}
},
{
2025-11-17 16:27:38 -03:00
"id": 12,
2025-11-15 21:03:11 -03:00
"type": "piechart",
"title": "Namespace RAM share",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
2025-11-15 21:03:11 -03:00
"h": 9,
"w": 12,
2025-11-15 21:03:11 -03:00
"x": 12,
2025-11-17 16:27:38 -03:00
"y": 10
},
"targets": [
{
2025-11-17 16:27:38 -03:00
"expr": "topk(10, sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\"}) by (namespace))",
"refId": "A",
"legendFormat": "{{namespace}}"
}
],
2025-11-15 21:03:11 -03:00
"fieldConfig": {
"defaults": {
"unit": "percent"
2025-11-15 21:03:11 -03:00
},
"overrides": []
},
"options": {
2025-11-15 21:03:11 -03:00
"legend": {
"displayMode": "list",
"placement": "right"
},
"pieType": "pie",
"reduceOptions": {
2025-11-15 21:03:11 -03:00
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
2025-11-15 21:03:11 -03:00
}
}
},
{
2025-11-17 16:27:38 -03:00
"id": 13,
2025-11-15 21:03:11 -03:00
"type": "timeseries",
"title": "Cluster node CPU",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
2025-11-17 16:27:38 -03:00
"y": 19
},
"targets": [
{
2025-11-17 16:27:38 -03:00
"expr": "avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
2025-11-15 21:03:11 -03:00
"refId": "A",
2025-11-16 00:55:28 -03:00
"legendFormat": "{{node}}"
2025-11-15 21:03:11 -03:00
}
],
"fieldConfig": {
"defaults": {
"unit": "percent"
},
"overrides": []
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"last"
]
},
2025-11-15 21:03:11 -03:00
"tooltip": {
"mode": "multi"
}
2025-11-17 16:27:38 -03:00
},
"links": [
{
"title": "Open atlas-nodes dashboard",
"url": "/d/atlas-nodes",
"targetBlank": true
}
]
2025-11-15 21:03:11 -03:00
},
{
2025-11-17 16:27:38 -03:00
"id": 14,
2025-11-15 21:03:11 -03:00
"type": "timeseries",
"title": "Cluster node RAM",
2025-11-15 21:03:11 -03:00
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
2025-11-17 16:27:38 -03:00
"y": 19
2025-11-15 21:03:11 -03:00
},
"targets": [
{
2025-11-17 16:27:38 -03:00
"expr": "avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
2025-11-15 21:03:11 -03:00
"refId": "A",
2025-11-16 00:55:28 -03:00
"legendFormat": "{{node}}"
}
],
2025-11-15 21:03:11 -03:00
"fieldConfig": {
"defaults": {
"unit": "percent"
},
"overrides": []
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"last"
]
},
2025-11-15 21:03:11 -03:00
"tooltip": {
"mode": "multi"
}
2025-11-17 16:27:38 -03:00
},
"links": [
{
"title": "Open atlas-nodes dashboard",
"url": "/d/atlas-nodes",
"targetBlank": true
}
]
},
{
2025-11-17 16:27:38 -03:00
"id": 15,
"type": "timeseries",
"title": "Control plane CPU (incl. titan-db)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
2025-11-16 00:55:28 -03:00
"gridPos": {
2025-11-17 16:27:38 -03:00
"h": 7,
2025-11-16 00:55:28 -03:00
"w": 12,
"x": 0,
2025-11-17 16:27:38 -03:00
"y": 27
2025-11-16 00:55:28 -03:00
},
"targets": [
{
2025-11-17 16:27:38 -03:00
"expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"refId": "A",
"legendFormat": "{{node}}"
2025-11-16 00:55:28 -03:00
}
],
"fieldConfig": {
"defaults": {
2025-11-17 16:27:38 -03:00
"unit": "percent"
2025-11-16 00:55:28 -03:00
},
"overrides": []
},
"options": {
2025-11-17 16:27:38 -03:00
"legend": {
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi"
}
2025-11-17 16:27:38 -03:00
}
2025-11-16 00:55:28 -03:00
},
{
2025-11-17 16:27:38 -03:00
"id": 16,
"type": "timeseries",
"title": "Control plane RAM (incl. titan-db)",
2025-11-16 00:55:28 -03:00
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
2025-11-17 16:27:38 -03:00
"h": 7,
2025-11-16 00:55:28 -03:00
"w": 12,
"x": 12,
2025-11-17 16:27:38 -03:00
"y": 27
2025-11-16 00:55:28 -03:00
},
"targets": [
{
2025-11-17 16:27:38 -03:00
"expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"refId": "A",
"legendFormat": "{{node}}"
2025-11-16 00:55:28 -03:00
}
],
"fieldConfig": {
"defaults": {
2025-11-17 16:27:38 -03:00
"unit": "percent"
2025-11-16 00:55:28 -03:00
},
"overrides": []
},
"options": {
2025-11-17 16:27:38 -03:00
"legend": {
"displayMode": "table",
"placement": "right"
},
2025-11-17 16:27:38 -03:00
"tooltip": {
"mode": "multi"
}
2025-11-17 16:27:38 -03:00
}
2025-11-16 00:55:28 -03:00
},
{
2025-11-17 16:27:38 -03:00
"id": 17,
2025-11-16 00:55:28 -03:00
"type": "timeseries",
2025-11-17 16:27:38 -03:00
"title": "Cluster ingress throughput",
2025-11-16 00:55:28 -03:00
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 7,
"w": 12,
"x": 0,
2025-11-17 16:27:38 -03:00
"y": 34
},
"targets": [
{
2025-11-17 16:27:38 -03:00
"expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])) or on() vector(0)",
"refId": "A"
2025-11-16 00:55:28 -03:00
}
],
"fieldConfig": {
"defaults": {
2025-11-17 18:55:11 -03:00
"unit": "Bps"
2025-11-16 00:55:28 -03:00
},
"overrides": []
},
"options": {
"legend": {
2025-11-17 16:27:38 -03:00
"displayMode": "list",
2025-11-16 00:55:28 -03:00
"placement": "bottom"
},
"tooltip": {
"mode": "multi"
}
2025-11-17 16:27:38 -03:00
},
"links": [
{
"title": "Open atlas-network dashboard",
"url": "/d/atlas-network",
"targetBlank": true
}
]
2025-11-16 00:55:28 -03:00
},
{
2025-11-17 16:27:38 -03:00
"id": 18,
"type": "timeseries",
2025-11-17 16:27:38 -03:00
"title": "Cluster egress throughput",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 7,
"w": 12,
"x": 12,
2025-11-17 16:27:38 -03:00
"y": 34
},
"targets": [
{
2025-11-17 16:27:38 -03:00
"expr": "sum(rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])) or on() vector(0)",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
2025-11-17 18:55:11 -03:00
"unit": "Bps"
},
"overrides": []
},
"options": {
"legend": {
2025-11-17 16:27:38 -03:00
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi"
}
2025-11-17 16:27:38 -03:00
},
"links": [
{
"title": "Open atlas-network dashboard",
"url": "/d/atlas-network",
"targetBlank": true
}
]
},
{
2025-11-17 16:27:38 -03:00
"id": 19,
"type": "timeseries",
"title": "Root filesystem usage",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
2025-11-17 16:27:38 -03:00
"y": 41
},
"targets": [
{
2025-11-17 16:27:38 -03:00
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
"refId": "A",
"legendFormat": "{{node}}"
}
],
"fieldConfig": {
"defaults": {
"unit": "percent"
},
"overrides": []
},
"options": {
"legend": {
"displayMode": "table",
"placement": "right",
"calcs": [
"last"
]
},
"tooltip": {
"mode": "multi"
}
},
2025-11-17 16:27:38 -03:00
"timeFrom": "30d",
"links": [
{
"title": "Open atlas-storage dashboard",
"url": "/d/atlas-storage",
"targetBlank": true
}
]
},
{
2025-11-17 16:27:38 -03:00
"id": 20,
2025-11-16 00:55:28 -03:00
"type": "bargauge",
"title": "Nodes closest to full root disks",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
2025-11-17 16:27:38 -03:00
"y": 41
2025-11-16 00:55:28 -03:00
},
"targets": [
{
2025-11-17 16:27:38 -03:00
"expr": "topk(8, avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
2025-11-16 00:55:28 -03:00
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
2025-11-17 16:27:38 -03:00
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 50
},
{
"color": "orange",
"value": 70
},
{
"color": "red",
"value": 85
}
]
2025-11-17 16:27:38 -03:00
},
"displayName": "{{node}}"
2025-11-16 00:55:28 -03:00
},
"overrides": []
},
"options": {
"displayMode": "gradient",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
2025-11-15 21:03:11 -03:00
],
2025-11-16 00:55:28 -03:00
"fields": "",
"values": false
}
2025-11-17 16:27:38 -03:00
},
"links": [
{
"title": "Open atlas-storage dashboard",
"url": "/d/atlas-storage",
"targetBlank": true
}
]
2025-11-16 00:55:28 -03:00
},
{
2025-11-17 16:27:38 -03:00
"id": 21,
2025-11-16 00:55:28 -03:00
"type": "stat",
"title": "Astreae usage",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
2025-11-16 00:55:28 -03:00
"w": 6,
"x": 0,
2025-11-17 16:27:38 -03:00
"y": 49
2025-11-16 00:55:28 -03:00
},
"targets": [
{
"expr": "100 - (sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"}) * 100)",
2025-11-16 00:55:28 -03:00
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
2025-11-16 00:55:28 -03:00
"mode": "percentage",
"steps": [
2025-11-16 00:55:28 -03:00
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
2025-11-15 21:03:11 -03:00
{
"color": "red",
2025-11-16 00:55:28 -03:00
"value": 85
}
]
},
2025-11-17 16:27:38 -03:00
"unit": "percent",
"custom": {
"displayMode": "auto"
}
2025-11-16 00:55:28 -03:00
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
2025-11-17 16:27:38 -03:00
},
"links": [
{
"title": "Open atlas-storage dashboard",
"url": "/d/atlas-storage",
"targetBlank": true
}
]
2025-11-16 00:55:28 -03:00
},
{
2025-11-17 16:27:38 -03:00
"id": 22,
2025-11-16 00:55:28 -03:00
"type": "stat",
"title": "Asteria usage",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
2025-11-16 00:55:28 -03:00
"w": 6,
"x": 6,
2025-11-17 16:27:38 -03:00
"y": 49
2025-11-16 00:55:28 -03:00
},
"targets": [
{
"expr": "100 - (sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"}) * 100)",
2025-11-16 00:55:28 -03:00
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "percentage",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 70
},
{
"color": "red",
"value": 85
}
]
},
2025-11-17 16:27:38 -03:00
"unit": "percent",
"custom": {
"displayMode": "auto"
}
2025-11-16 00:55:28 -03:00
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
2025-11-17 16:27:38 -03:00
},
"links": [
{
"title": "Open atlas-storage dashboard",
"url": "/d/atlas-storage",
"targetBlank": true
}
]
2025-11-16 00:55:28 -03:00
},
{
2025-11-17 16:27:38 -03:00
"id": 23,
2025-11-16 00:55:28 -03:00
"type": "stat",
"title": "Astreae free",
2025-11-16 00:55:28 -03:00
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
2025-11-16 00:55:28 -03:00
"w": 6,
"x": 12,
2025-11-17 16:27:38 -03:00
"y": 49
2025-11-16 00:55:28 -03:00
},
"targets": [
{
"expr": "sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})",
2025-11-16 00:55:28 -03:00
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgba(115, 115, 115, 1)",
2025-11-15 21:03:11 -03:00
"value": null
},
{
"color": "green",
"value": 1
}
]
2025-11-16 00:55:28 -03:00
},
2025-11-17 18:55:11 -03:00
"unit": "decbytes",
2025-11-17 16:27:38 -03:00
"custom": {
"displayMode": "auto"
}
},
"overrides": []
},
"options": {
2025-11-16 00:55:28 -03:00
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
2025-11-17 16:27:38 -03:00
},
"links": [
{
"title": "Open atlas-storage dashboard",
"url": "/d/atlas-storage",
"targetBlank": true
}
]
2025-11-15 21:03:11 -03:00
},
{
2025-11-17 16:27:38 -03:00
"id": 24,
2025-11-16 00:55:28 -03:00
"type": "stat",
"title": "Asteria free",
2025-11-15 21:03:11 -03:00
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 6,
2025-11-16 00:55:28 -03:00
"w": 6,
"x": 18,
2025-11-17 16:27:38 -03:00
"y": 49
2025-11-15 21:03:11 -03:00
},
"targets": [
{
"expr": "sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})",
2025-11-15 21:03:11 -03:00
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
2025-11-16 00:55:28 -03:00
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "rgba(115, 115, 115, 1)",
"value": null
},
{
"color": "green",
"value": 1
}
]
},
2025-11-17 18:55:11 -03:00
"unit": "decbytes",
2025-11-17 16:27:38 -03:00
"custom": {
"displayMode": "auto"
}
2025-11-15 21:03:11 -03:00
},
"overrides": []
},
"options": {
2025-11-16 00:55:28 -03:00
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
2025-11-15 21:03:11 -03:00
},
2025-11-17 16:27:38 -03:00
"links": [
2025-11-15 21:03:11 -03:00
{
2025-11-17 16:27:38 -03:00
"title": "Open atlas-storage dashboard",
"url": "/d/atlas-storage",
"targetBlank": true
2025-11-15 21:03:11 -03:00
}
]
2025-11-15 21:03:11 -03:00
},
{
"id": 25,
2025-11-15 21:03:11 -03:00
"type": "text",
"title": "About this dashboard",
"gridPos": {
2025-11-16 00:55:28 -03:00
"h": 5,
2025-11-15 21:03:11 -03:00
"w": 24,
"x": 0,
2025-11-17 16:27:38 -03:00
"y": 55
2025-11-15 21:03:11 -03:00
},
"datasource": null,
2025-11-15 21:03:11 -03:00
"options": {
2025-11-16 00:55:28 -03:00
"mode": "markdown",
2025-11-17 16:27:38 -03:00
"content": "### Atlas Overview\n- Anonymous users land here; follow the panel links for pod/node/storage/network drill-downs.\n- Control plane workload count flags any non-system pods that slipped onto the HA nodes.\n- Problem and stuck pods use kube-state-metrics so counts and detail tables match exactly."
2025-11-15 21:03:11 -03:00
}
}
],
"schemaVersion": 39,
"style": "dark",
"tags": [
"atlas",
"overview"
],
"templating": {
"list": []
},
"time": {
"from": "now-12h",
"to": "now"
2025-11-17 16:27:38 -03:00
},
"links": [
{
"title": "Atlas Pods",
"type": "dashboard",
"dashboardUid": "atlas-pods",
"keepTime": false
},
{
"title": "Atlas Nodes",
"type": "dashboard",
"dashboardUid": "atlas-nodes",
"keepTime": false
},
{
"title": "Atlas Storage",
"type": "dashboard",
"dashboardUid": "atlas-storage",
"keepTime": false
},
{
"title": "Atlas Network",
"type": "dashboard",
"dashboardUid": "atlas-network",
"keepTime": false
}
]
}