224 lines
5.5 KiB
YAML
224 lines
5.5 KiB
YAML
|
|
# services/monitoring/grafana-dashboard-sre.yaml
|
||
|
|
apiVersion: v1
|
||
|
|
kind: ConfigMap
|
||
|
|
metadata:
|
||
|
|
name: grafana-dashboard-sre
|
||
|
|
labels:
|
||
|
|
grafana_dashboard: "1"
|
||
|
|
data:
|
||
|
|
atlas-sre-overview.json: |
|
||
|
|
{
|
||
|
|
"annotations": {
|
||
|
|
"list": [
|
||
|
|
{
|
||
|
|
"builtIn": 1,
|
||
|
|
"datasource": {
|
||
|
|
"type": "datasource",
|
||
|
|
"uid": "grafana"
|
||
|
|
},
|
||
|
|
"enable": true,
|
||
|
|
"hide": true,
|
||
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
||
|
|
"name": "Annotations & Alerts",
|
||
|
|
"type": "dashboard"
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"editable": true,
|
||
|
|
"fiscalYearStartMonth": 0,
|
||
|
|
"graphTooltip": 0,
|
||
|
|
"links": [],
|
||
|
|
"panels": [
|
||
|
|
{
|
||
|
|
"datasource": {
|
||
|
|
"type": "prometheus",
|
||
|
|
"uid": "atlas-vm"
|
||
|
|
},
|
||
|
|
"description": "Percentage of Ready nodes.",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"color": {
|
||
|
|
"mode": "continuous"
|
||
|
|
},
|
||
|
|
"mappings": [],
|
||
|
|
"max": 100,
|
||
|
|
"min": 0,
|
||
|
|
"thresholds": {
|
||
|
|
"mode": "percentage",
|
||
|
|
"steps": [
|
||
|
|
{
|
||
|
|
"color": "red",
|
||
|
|
"value": null
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"color": "green",
|
||
|
|
"value": 90
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"unit": "percent"
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"gridPos": {
|
||
|
|
"h": 7,
|
||
|
|
"w": 6,
|
||
|
|
"x": 0,
|
||
|
|
"y": 0
|
||
|
|
},
|
||
|
|
"id": 10,
|
||
|
|
"options": {
|
||
|
|
"colorMode": "value",
|
||
|
|
"graphMode": "none",
|
||
|
|
"justifyMode": "center",
|
||
|
|
"orientation": "auto",
|
||
|
|
"reduceOptions": {
|
||
|
|
"calcs": [
|
||
|
|
"lastNotNull"
|
||
|
|
],
|
||
|
|
"fields": "",
|
||
|
|
"values": false
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"datasource": {
|
||
|
|
"type": "prometheus",
|
||
|
|
"uid": "atlas-vm"
|
||
|
|
},
|
||
|
|
"expr": "avg(kube_node_status_condition{condition=\"Ready\",status=\"true\"}) * 100",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"title": "Ready nodes",
|
||
|
|
"type": "stat"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"datasource": {
|
||
|
|
"type": "prometheus",
|
||
|
|
"uid": "atlas-vm"
|
||
|
|
},
|
||
|
|
"gridPos": {
|
||
|
|
"h": 7,
|
||
|
|
"w": 6,
|
||
|
|
"x": 6,
|
||
|
|
"y": 0
|
||
|
|
},
|
||
|
|
"id": 11,
|
||
|
|
"options": {
|
||
|
|
"legend": {
|
||
|
|
"displayMode": "table",
|
||
|
|
"placement": "bottom"
|
||
|
|
},
|
||
|
|
"tooltip": {
|
||
|
|
"mode": "multi"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"datasource": {
|
||
|
|
"type": "prometheus",
|
||
|
|
"uid": "atlas-vm"
|
||
|
|
},
|
||
|
|
"expr": "sum by (node)(node_filesystem_avail_bytes{mountpoint=\"/\"})",
|
||
|
|
"legendFormat": "{{node}}",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"title": "Free root filesystem bytes",
|
||
|
|
"type": "timeseries"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"datasource": {
|
||
|
|
"type": "prometheus",
|
||
|
|
"uid": "atlas-vm"
|
||
|
|
},
|
||
|
|
"gridPos": {
|
||
|
|
"h": 10,
|
||
|
|
"w": 12,
|
||
|
|
"x": 0,
|
||
|
|
"y": 7
|
||
|
|
},
|
||
|
|
"id": 12,
|
||
|
|
"options": {
|
||
|
|
"legend": {
|
||
|
|
"calcs": [],
|
||
|
|
"displayMode": "list",
|
||
|
|
"placement": "bottom"
|
||
|
|
},
|
||
|
|
"tooltip": {
|
||
|
|
"mode": "single"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"datasource": {
|
||
|
|
"type": "prometheus",
|
||
|
|
"uid": "atlas-vm"
|
||
|
|
},
|
||
|
|
"expr": "sum(rate(container_cpu_usage_seconds_total{namespace=\"crypto\",container!=\"\"}[5m])) by (pod)",
|
||
|
|
"legendFormat": "{{pod}}",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"title": "Crypto namespace CPU usage",
|
||
|
|
"type": "timeseries"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"datasource": {
|
||
|
|
"type": "prometheus",
|
||
|
|
"uid": "atlas-vm"
|
||
|
|
},
|
||
|
|
"gridPos": {
|
||
|
|
"h": 9,
|
||
|
|
"w": 12,
|
||
|
|
"x": 0,
|
||
|
|
"y": 17
|
||
|
|
},
|
||
|
|
"id": 13,
|
||
|
|
"options": {
|
||
|
|
"displayMode": "gradient",
|
||
|
|
"orientation": "horizontal",
|
||
|
|
"reduceOptions": {
|
||
|
|
"calcs": [
|
||
|
|
"lastNotNull"
|
||
|
|
],
|
||
|
|
"fields": "",
|
||
|
|
"values": false
|
||
|
|
},
|
||
|
|
"showUnfilled": false
|
||
|
|
},
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"datasource": {
|
||
|
|
"type": "prometheus",
|
||
|
|
"uid": "atlas-vm"
|
||
|
|
},
|
||
|
|
"expr": "count(sum(kube_pod_status_phase{phase=\"Failed\"}) by (namespace))",
|
||
|
|
"legendFormat": "",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"title": "Namespaces with failed pods",
|
||
|
|
"type": "bargauge"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"schemaVersion": 39,
|
||
|
|
"style": "dark",
|
||
|
|
"tags": [
|
||
|
|
"atlas",
|
||
|
|
"sre"
|
||
|
|
],
|
||
|
|
"templating": {
|
||
|
|
"list": []
|
||
|
|
},
|
||
|
|
"time": {
|
||
|
|
"from": "now-12h",
|
||
|
|
"to": "now"
|
||
|
|
},
|
||
|
|
"timepicker": {},
|
||
|
|
"title": "Atlas SRE Overview",
|
||
|
|
"uid": "atlas-sre",
|
||
|
|
"version": 1
|
||
|
|
}
|