1792 lines
53 KiB
YAML
1792 lines
53 KiB
YAML
# services/monitoring/grafana-dashboard-overview.yaml
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: grafana-dashboard-overview
|
|
labels:
|
|
grafana_dashboard: "1"
|
|
data:
|
|
atlas-overview.json: |
|
|
{
|
|
"uid": "atlas-overview",
|
|
"title": "Atlas Overview",
|
|
"folderUid": "overview",
|
|
"editable": false,
|
|
"annotations": {
|
|
"list": []
|
|
},
|
|
"panels": [
|
|
{
|
|
"id": 2,
|
|
"type": "gauge",
|
|
"title": "Control Plane Ready",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 4,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"min": 0,
|
|
"max": 3,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "red",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 3
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"orientation": "auto",
|
|
"showThresholdMarkers": false,
|
|
"showThresholdLabels": false
|
|
}
|
|
},
|
|
{
|
|
"id": 3,
|
|
"type": "stat",
|
|
"title": "Control Plane Workloads",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 4,
|
|
"y": 0
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"}) or on() vector(0)",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 1
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 2
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 3
|
|
}
|
|
]
|
|
},
|
|
"unit": "none",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-pods dashboard",
|
|
"url": "/d/atlas-pods",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 5,
|
|
"type": "stat",
|
|
"title": "Stuck Terminating",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 7,
|
|
"y": 0
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)))",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 1
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 2
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 3
|
|
}
|
|
]
|
|
},
|
|
"unit": "none",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-pods dashboard",
|
|
"url": "/d/atlas-pods",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 27,
|
|
"type": "stat",
|
|
"title": "Atlas Availability (30d)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 4,
|
|
"x": 10,
|
|
"y": 0
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[30d:5m])",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "red",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 0.99
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 0.999
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 0.9999
|
|
}
|
|
]
|
|
},
|
|
"unit": "percentunit",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
},
|
|
"decimals": 3
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
}
|
|
},
|
|
{
|
|
"id": 4,
|
|
"type": "stat",
|
|
"title": "Problem Pods",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 14,
|
|
"y": 0
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(max by (namespace,pod) (kube_pod_status_phase{phase!~\"Running|Succeeded\"}))",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 1
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 2
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 3
|
|
}
|
|
]
|
|
},
|
|
"unit": "none",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-pods dashboard",
|
|
"url": "/d/atlas-pods",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 6,
|
|
"type": "stat",
|
|
"title": "CrashLoop / ImagePull",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 17,
|
|
"y": 0
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(max by (namespace,pod) (kube_pod_container_status_waiting_reason{reason=~\"CrashLoopBackOff|ImagePullBackOff\"}))",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 1
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 2
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 3
|
|
}
|
|
]
|
|
},
|
|
"unit": "none",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-pods dashboard",
|
|
"url": "/d/atlas-pods",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 1,
|
|
"type": "gauge",
|
|
"title": "Workers Ready",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 4,
|
|
"x": 20,
|
|
"y": 0
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"min": 0,
|
|
"max": 18,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "red",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 16
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 17
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 18
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"orientation": "auto",
|
|
"showThresholdMarkers": false,
|
|
"showThresholdLabels": false
|
|
}
|
|
},
|
|
{
|
|
"id": 7,
|
|
"type": "stat",
|
|
"title": "Hottest node: CPU",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 3,
|
|
"w": 6,
|
|
"x": 0,
|
|
"y": 5
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "label_replace(topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 85
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "name_and_value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-nodes dashboard",
|
|
"url": "/d/atlas-nodes",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 8,
|
|
"type": "stat",
|
|
"title": "Hottest node: RAM",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 3,
|
|
"w": 6,
|
|
"x": 6,
|
|
"y": 5
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "label_replace(topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 85
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "name_and_value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-nodes dashboard",
|
|
"url": "/d/atlas-nodes",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 9,
|
|
"type": "stat",
|
|
"title": "Hottest node: NET (rx+tx)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 3,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 5
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_network_receive_bytes_total{device!~\"lo\"}[5m]) + rate(node_network_transmit_bytes_total{device!~\"lo\"}[5m]))) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "rgba(115, 115, 115, 1)",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 1
|
|
}
|
|
]
|
|
},
|
|
"unit": "Bps",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "name_and_value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-nodes dashboard",
|
|
"url": "/d/atlas-nodes",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 10,
|
|
"type": "stat",
|
|
"title": "Hottest node: I/O (r+w)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 3,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 5
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m]))) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "rgba(115, 115, 115, 1)",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 1
|
|
}
|
|
]
|
|
},
|
|
"unit": "Bps",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "name_and_value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-nodes dashboard",
|
|
"url": "/d/atlas-nodes",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 23,
|
|
"type": "stat",
|
|
"title": "Astreae Usage",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 6,
|
|
"x": 0,
|
|
"y": 10
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "100 - (sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"}) * 100)",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 85
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-storage dashboard",
|
|
"url": "/d/atlas-storage",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 24,
|
|
"type": "stat",
|
|
"title": "Asteria Usage",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 6,
|
|
"x": 6,
|
|
"y": 10
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "100 - (sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"}) * 100)",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "percentage",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 85
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-storage dashboard",
|
|
"url": "/d/atlas-storage",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 25,
|
|
"type": "stat",
|
|
"title": "Astreae Free",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 10
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "rgba(115, 115, 115, 1)",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 1
|
|
}
|
|
]
|
|
},
|
|
"unit": "decbytes",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-storage dashboard",
|
|
"url": "/d/atlas-storage",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 26,
|
|
"type": "stat",
|
|
"title": "Asteria Free",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 10
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "rgba(115, 115, 115, 1)",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 1
|
|
}
|
|
]
|
|
},
|
|
"unit": "decbytes",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-storage dashboard",
|
|
"url": "/d/atlas-storage",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 11,
|
|
"type": "piechart",
|
|
"title": "Namespace CPU Share",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 8,
|
|
"x": 0,
|
|
"y": 16
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "100 * ( ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)",
|
|
"refId": "A",
|
|
"legendFormat": "{{namespace}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent",
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "right"
|
|
},
|
|
"pieType": "pie",
|
|
"displayLabels": [
|
|
"percent"
|
|
],
|
|
"tooltip": {
|
|
"mode": "single"
|
|
},
|
|
"colorScheme": "interpolateSpectral",
|
|
"colorBy": "value",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 12,
|
|
"type": "piechart",
|
|
"title": "Namespace GPU Share",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 8,
|
|
"x": 8,
|
|
"y": 16
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "100 * ( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( (sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)",
|
|
"refId": "A",
|
|
"legendFormat": "{{namespace}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent",
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "right"
|
|
},
|
|
"pieType": "pie",
|
|
"displayLabels": [
|
|
"percent"
|
|
],
|
|
"tooltip": {
|
|
"mode": "single"
|
|
},
|
|
"colorScheme": "interpolateSpectral",
|
|
"colorBy": "value",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 13,
|
|
"type": "piechart",
|
|
"title": "Namespace RAM Share",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 8,
|
|
"x": 16,
|
|
"y": 16
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "100 * ( ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ) / clamp_min(sum( ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) ) and on(namespace) ( (topk(10, ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) ) + (sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\"}) by (namespace) / 1e9) + ((sum((kube_pod_container_resource_requests{namespace!=\"\",resource=\"nvidia.com/gpu\"} or kube_pod_container_resource_limits{namespace!=\"\",resource=\"nvidia.com/gpu\"})) by (namespace)) or on(namespace) (sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) by (namespace) * 0) * 100)) >= bool 0) ) ), 1)",
|
|
"refId": "A",
|
|
"legendFormat": "{{namespace}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent",
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "right"
|
|
},
|
|
"pieType": "pie",
|
|
"displayLabels": [
|
|
"percent"
|
|
],
|
|
"tooltip": {
|
|
"mode": "single"
|
|
},
|
|
"colorScheme": "interpolateSpectral",
|
|
"colorBy": "value",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 14,
|
|
"type": "timeseries",
|
|
"title": "Worker Node CPU",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 12,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 32
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right",
|
|
"calcs": [
|
|
"last"
|
|
]
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-nodes dashboard",
|
|
"url": "/d/atlas-nodes",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 15,
|
|
"type": "timeseries",
|
|
"title": "Worker Node RAM",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 12,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 32
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right",
|
|
"calcs": [
|
|
"last"
|
|
]
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-nodes dashboard",
|
|
"url": "/d/atlas-nodes",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 16,
|
|
"type": "timeseries",
|
|
"title": "Control plane CPU",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 44
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right"
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 17,
|
|
"type": "timeseries",
|
|
"title": "Control plane RAM",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 44
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right"
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 28,
|
|
"type": "piechart",
|
|
"title": "Pods by Node",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 54
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(kube_pod_info{pod!=\"\"}) by (node)",
|
|
"refId": "A",
|
|
"legendFormat": "{{namespace}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent",
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "right"
|
|
},
|
|
"pieType": "pie",
|
|
"displayLabels": [
|
|
"percent"
|
|
],
|
|
"tooltip": {
|
|
"mode": "single"
|
|
},
|
|
"colorScheme": "interpolateSpectral",
|
|
"colorBy": "value",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 29,
|
|
"type": "bargauge",
|
|
"title": "Top Nodes by Pod Count",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 54
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "topk(12, sum(kube_pod_info{pod!=\"\"}) by (node))",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"min": 0,
|
|
"max": null,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 50
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 85
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"displayMode": "gradient",
|
|
"orientation": "horizontal",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
}
|
|
},
|
|
"transformations": [
|
|
{
|
|
"id": "sortBy",
|
|
"options": {
|
|
"fields": [
|
|
"Value"
|
|
],
|
|
"order": "desc"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 18,
|
|
"type": "timeseries",
|
|
"title": "Cluster Ingress Throughput",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 7,
|
|
"w": 8,
|
|
"x": 0,
|
|
"y": 25
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
|
"refId": "A",
|
|
"legendFormat": "Ingress (Traefik)"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "Bps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "bottom"
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-network dashboard",
|
|
"url": "/d/atlas-network",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 19,
|
|
"type": "timeseries",
|
|
"title": "Cluster Egress Throughput",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 7,
|
|
"w": 8,
|
|
"x": 8,
|
|
"y": 25
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
|
"refId": "A",
|
|
"legendFormat": "Egress (Traefik)"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "Bps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "bottom"
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-network dashboard",
|
|
"url": "/d/atlas-network",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 20,
|
|
"type": "timeseries",
|
|
"title": "Intra-Cluster Throughput",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 7,
|
|
"w": 8,
|
|
"x": 16,
|
|
"y": 25
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"traefik\",pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"traefik\",pod!=\"\"}[5m])) or on() vector(0)",
|
|
"refId": "A",
|
|
"legendFormat": "Internal traffic"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "Bps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "bottom"
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-network dashboard",
|
|
"url": "/d/atlas-network",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 21,
|
|
"type": "timeseries",
|
|
"title": "Root Filesystem Usage",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 16,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 64
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right",
|
|
"calcs": [
|
|
"last"
|
|
]
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"timeFrom": "30d",
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-storage dashboard",
|
|
"url": "/d/atlas-storage",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 22,
|
|
"type": "bargauge",
|
|
"title": "Nodes Closest to Full Root Disks",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 16,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 64
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "topk(12, avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent",
|
|
"min": 0,
|
|
"max": 100,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 50
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 85
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"displayMode": "gradient",
|
|
"orientation": "horizontal",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open atlas-storage dashboard",
|
|
"url": "/d/atlas-storage",
|
|
"targetBlank": true
|
|
}
|
|
],
|
|
"transformations": [
|
|
{
|
|
"id": "sortBy",
|
|
"options": {
|
|
"fields": [
|
|
"Value"
|
|
],
|
|
"order": "desc"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"schemaVersion": 39,
|
|
"style": "dark",
|
|
"tags": [
|
|
"atlas",
|
|
"overview"
|
|
],
|
|
"templating": {
|
|
"list": []
|
|
},
|
|
"time": {
|
|
"from": "now-1h",
|
|
"to": "now"
|
|
},
|
|
"refresh": "1m",
|
|
"links": []
|
|
}
|