3417 lines
104 KiB
YAML
3417 lines
104 KiB
YAML
# services/monitoring/grafana-dashboard-overview.yaml
|
|
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: grafana-dashboard-overview
|
|
labels:
|
|
grafana_dashboard: "1"
|
|
data:
|
|
atlas-overview.json: |
|
|
{
|
|
"uid": "atlas-overview",
|
|
"title": "Atlas Overview",
|
|
"folderUid": "overview",
|
|
"editable": false,
|
|
"annotations": {
|
|
"list": []
|
|
},
|
|
"panels": [
|
|
{
|
|
"id": 2,
|
|
"type": "gauge",
|
|
"title": "Control Plane Ready",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 4,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"min": 0,
|
|
"max": 3,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "red",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 3
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"orientation": "auto",
|
|
"showThresholdMarkers": false,
|
|
"showThresholdLabels": false
|
|
}
|
|
},
|
|
{
|
|
"id": 3,
|
|
"type": "stat",
|
|
"title": "Control Plane Workloads",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 4,
|
|
"y": 0
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"}) or on() vector(0)",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 1
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 2
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 3
|
|
}
|
|
]
|
|
},
|
|
"unit": "none",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Pods",
|
|
"url": "/d/atlas-pods",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 5,
|
|
"type": "stat",
|
|
"title": "Stuck Terminating",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 7,
|
|
"y": 0
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0))) or on() vector(0)",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 1
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 2
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 3
|
|
}
|
|
]
|
|
},
|
|
"unit": "none",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Pods",
|
|
"url": "/d/atlas-pods",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 27,
|
|
"type": "stat",
|
|
"title": "Atlas Availability",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 4,
|
|
"x": 10,
|
|
"y": 0
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:1h])",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "red",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 0.99
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 0.999
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 0.9999
|
|
},
|
|
{
|
|
"color": "blue",
|
|
"value": 0.99999
|
|
}
|
|
]
|
|
},
|
|
"unit": "percentunit",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
},
|
|
"decimals": 4
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
}
|
|
},
|
|
{
|
|
"id": 4,
|
|
"type": "stat",
|
|
"title": "Problem Pods",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 14,
|
|
"y": 0
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(max by (namespace,pod) (kube_pod_status_phase{phase!~\"Running|Succeeded\"})) or on() vector(0)",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 1
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 2
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 3
|
|
}
|
|
]
|
|
},
|
|
"unit": "none",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Pods",
|
|
"url": "/d/atlas-pods",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 6,
|
|
"type": "stat",
|
|
"title": "CrashLoop / ImagePull",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 3,
|
|
"x": 17,
|
|
"y": 0
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(max by (namespace,pod) (kube_pod_container_status_waiting_reason{reason=~\"CrashLoopBackOff|ImagePullBackOff\"})) or on() vector(0)",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 1
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 2
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 3
|
|
}
|
|
]
|
|
},
|
|
"unit": "none",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Pods",
|
|
"url": "/d/atlas-pods",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 1,
|
|
"type": "gauge",
|
|
"title": "Workers Ready",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 4,
|
|
"x": 20,
|
|
"y": 0
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"min": 0,
|
|
"max": 20,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "red",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 18
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 19
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 20
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"orientation": "auto",
|
|
"showThresholdMarkers": false,
|
|
"showThresholdLabels": false
|
|
}
|
|
},
|
|
{
|
|
"id": 7,
|
|
"type": "stat",
|
|
"title": "Hottest node: CPU",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 3,
|
|
"x": 0,
|
|
"y": 5
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "label_replace(topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 50
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 75
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 91.5
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "name_and_value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Nodes",
|
|
"url": "/d/atlas-nodes",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 8,
|
|
"type": "stat",
|
|
"title": "Hottest node: RAM",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 3,
|
|
"x": 3,
|
|
"y": 5
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "label_replace(topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 50
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 75
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 91.5
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "name_and_value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Nodes",
|
|
"url": "/d/atlas-nodes",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 9,
|
|
"type": "stat",
|
|
"title": "Hottest node: NET (rx+tx)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 3,
|
|
"x": 6,
|
|
"y": 5
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_network_receive_bytes_total{device!~\"lo\"}[5m]) + rate(node_network_transmit_bytes_total{device!~\"lo\"}[5m]))) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "rgba(115, 115, 115, 1)",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 1
|
|
}
|
|
]
|
|
},
|
|
"unit": "Bps",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "name_and_value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Nodes",
|
|
"url": "/d/atlas-nodes",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 10,
|
|
"type": "stat",
|
|
"title": "Hottest node: I/O (r+w)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 3,
|
|
"x": 9,
|
|
"y": 5
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m]))) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "rgba(115, 115, 115, 1)",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 1
|
|
}
|
|
]
|
|
},
|
|
"unit": "Bps",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "name_and_value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Nodes",
|
|
"url": "/d/atlas-nodes",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 23,
|
|
"type": "stat",
|
|
"title": "Astreae Usage",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 3,
|
|
"x": 12,
|
|
"y": 5
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "100 - (sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"}) * 100)",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 50
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 75
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 91.5
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Storage",
|
|
"url": "/d/atlas-storage",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 24,
|
|
"type": "stat",
|
|
"title": "Asteria Usage",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 3,
|
|
"x": 15,
|
|
"y": 5
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "100 - (sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"}) * 100)",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 50
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 75
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 91.5
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Storage",
|
|
"url": "/d/atlas-storage",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 25,
|
|
"type": "stat",
|
|
"title": "Astreae Free",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 3,
|
|
"x": 18,
|
|
"y": 5
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "rgba(115, 115, 115, 1)",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 1
|
|
}
|
|
]
|
|
},
|
|
"unit": "decbytes",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Storage",
|
|
"url": "/d/atlas-storage",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 26,
|
|
"type": "stat",
|
|
"title": "Asteria Free",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 3,
|
|
"x": 21,
|
|
"y": 5
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "rgba(115, 115, 115, 1)",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 1
|
|
}
|
|
]
|
|
},
|
|
"unit": "decbytes",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Storage",
|
|
"url": "/d/atlas-storage",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 40,
|
|
"type": "stat",
|
|
"title": "UPS Current Load",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 4,
|
|
"x": 0,
|
|
"y": 12
|
|
},
|
|
"targets": [
|
|
{
|
|
"refId": "A",
|
|
"expr": "max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Pyrphoros\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Pyrphoros\"}) / 100) or on() vector(0)",
|
|
"legendFormat": "Pyrphoros Draw (W)",
|
|
"instant": true
|
|
},
|
|
{
|
|
"refId": "B",
|
|
"expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)",
|
|
"legendFormat": "Pyrphoros Discharge",
|
|
"instant": true
|
|
},
|
|
{
|
|
"refId": "C",
|
|
"expr": "max(ananke_ups_on_battery{job=\"ananke-power\",source=\"Pyrphoros\"}) or on() vector(0)",
|
|
"legendFormat": "Pyrphoros Status",
|
|
"instant": true
|
|
},
|
|
{
|
|
"refId": "D",
|
|
"expr": "max((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100) or on() vector(0)",
|
|
"legendFormat": "Statera Draw (W)",
|
|
"instant": true
|
|
},
|
|
{
|
|
"refId": "E",
|
|
"expr": "max(ananke_ups_runtime_seconds{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)",
|
|
"legendFormat": "Statera Discharge",
|
|
"instant": true
|
|
},
|
|
{
|
|
"refId": "F",
|
|
"expr": "max(ananke_ups_on_battery{job=\"ananke-power\",source=\"Statera\"}) or on() vector(0)",
|
|
"legendFormat": "Statera Status",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "rgba(115, 115, 115, 1)",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 1
|
|
}
|
|
]
|
|
},
|
|
"unit": "none",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
},
|
|
"decimals": 1
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Pyrphoros Draw (W)"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "unit",
|
|
"value": "watt"
|
|
},
|
|
{
|
|
"id": "description",
|
|
"value": "Attached node: titan-db"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Statera Draw (W)"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "unit",
|
|
"value": "watt"
|
|
},
|
|
{
|
|
"id": "description",
|
|
"value": "Attached node: titan-24"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Pyrphoros Discharge"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "unit",
|
|
"value": "s"
|
|
},
|
|
{
|
|
"id": "description",
|
|
"value": "Attached node: titan-db"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Statera Discharge"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "unit",
|
|
"value": "s"
|
|
},
|
|
{
|
|
"id": "description",
|
|
"value": "Attached node: titan-24"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Pyrphoros Status"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "mappings",
|
|
"value": [
|
|
{
|
|
"type": "value",
|
|
"options": {
|
|
"0": {
|
|
"text": "\u26a1 Charging"
|
|
},
|
|
"1": {
|
|
"text": "\ud83d\udd0b Discharging"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": "description",
|
|
"value": "Attached node: titan-db"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Statera Status"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "mappings",
|
|
"value": [
|
|
{
|
|
"type": "value",
|
|
"options": {
|
|
"0": {
|
|
"text": "\u26a1 Charging"
|
|
},
|
|
"1": {
|
|
"text": "\ud83d\udd0b Discharging"
|
|
}
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": "description",
|
|
"value": "Attached node: titan-24"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "name_and_value",
|
|
"orientation": "horizontal",
|
|
"wideLayout": true
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Power",
|
|
"url": "/d/atlas-power",
|
|
"targetBlank": true
|
|
}
|
|
],
|
|
"description": "Per-UPS live snapshot: current draw, discharge, and charging/discharging status."
|
|
},
|
|
{
|
|
"id": 41,
|
|
"type": "timeseries",
|
|
"title": "UPS History (Power Draw)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 4,
|
|
"x": 4,
|
|
"y": 12
|
|
},
|
|
"targets": [
|
|
{
|
|
"refId": "A",
|
|
"expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Pyrphoros\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Pyrphoros\"}) / 100)",
|
|
"legendFormat": "Pyrphoros"
|
|
},
|
|
{
|
|
"refId": "B",
|
|
"expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100)",
|
|
"legendFormat": "Statera"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "watt"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "bottom"
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Power",
|
|
"url": "/d/atlas-power",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 42,
|
|
"type": "stat",
|
|
"title": "Current Climate",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 4,
|
|
"x": 8,
|
|
"y": 12
|
|
},
|
|
"targets": [
|
|
{
|
|
"refId": "A",
|
|
"expr": "max(typhon_temperature_celsius) or on() vector(0)",
|
|
"legendFormat": "Tent Temp (\u00b0C)",
|
|
"instant": true
|
|
},
|
|
{
|
|
"refId": "B",
|
|
"expr": "max(typhon_vpd_kpa) or on() vector(0)",
|
|
"legendFormat": "Tent VPD (kPa)",
|
|
"instant": true
|
|
},
|
|
{
|
|
"refId": "C",
|
|
"expr": "max(typhon_relative_humidity_percent) or on() vector(0)",
|
|
"legendFormat": "Tent RH (%)",
|
|
"instant": true
|
|
},
|
|
{
|
|
"refId": "D",
|
|
"expr": "max((243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / (17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))) or on() vector(0)",
|
|
"legendFormat": "Dew Point (\u00b0C)",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "rgba(115, 115, 115, 1)",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 1
|
|
}
|
|
]
|
|
},
|
|
"unit": "none",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
},
|
|
"decimals": 2
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Tent Temp (\u00b0C)"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "unit",
|
|
"value": "celsius"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Tent VPD (kPa)"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "unit",
|
|
"value": "suffix:kPa"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Tent RH (%)"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "unit",
|
|
"value": "percent"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Dew Point (\u00b0C)"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "unit",
|
|
"value": "celsius"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value",
|
|
"orientation": "horizontal",
|
|
"wideLayout": true
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Power",
|
|
"url": "/d/atlas-power",
|
|
"targetBlank": true
|
|
}
|
|
],
|
|
"description": "Current tent temperature, humidity, VPD, and dew point."
|
|
},
|
|
{
|
|
"id": 43,
|
|
"type": "timeseries",
|
|
"title": "Climate History",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 4,
|
|
"x": 12,
|
|
"y": 12
|
|
},
|
|
"targets": [
|
|
{
|
|
"refId": "A",
|
|
"expr": "typhon_temperature_celsius",
|
|
"legendFormat": "Temperature (\u00b0C)"
|
|
},
|
|
{
|
|
"refId": "B",
|
|
"expr": "typhon_relative_humidity_percent",
|
|
"legendFormat": "Humidity (%)"
|
|
},
|
|
{
|
|
"refId": "C",
|
|
"expr": "typhon_vpd_kpa",
|
|
"legendFormat": "VPD (kPa)"
|
|
},
|
|
{
|
|
"refId": "D",
|
|
"expr": "(243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / (17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))",
|
|
"legendFormat": "Dew Point (\u00b0C)"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "celsius"
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Humidity (%)"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "unit",
|
|
"value": "percent"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "VPD (kPa)"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "unit",
|
|
"value": "none"
|
|
},
|
|
{
|
|
"id": "custom.axisPlacement",
|
|
"value": "right"
|
|
},
|
|
{
|
|
"id": "custom.axisLabel",
|
|
"value": "kPa"
|
|
},
|
|
{
|
|
"id": "decimals",
|
|
"value": 2
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "bottom"
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Power",
|
|
"url": "/d/atlas-power",
|
|
"targetBlank": true
|
|
}
|
|
],
|
|
"description": "Two-axis chart: tent temperature/humidity/dew point (left axis) and VPD in kPa (right axis)."
|
|
},
|
|
{
|
|
"id": 140,
|
|
"type": "stat",
|
|
"title": "Fan Activity",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 4,
|
|
"x": 16,
|
|
"y": 12
|
|
},
|
|
"targets": [
|
|
{
|
|
"refId": "A",
|
|
"expr": "round(max(typhon_fan_speed_level{fan_group=\"outlet\"}) or on() vector(0))",
|
|
"legendFormat": "Inside Outlet",
|
|
"instant": true
|
|
},
|
|
{
|
|
"refId": "B",
|
|
"expr": "round(max(typhon_fan_speed_level{fan_group=\"inside_inlet\"}) or on() vector(0))",
|
|
"legendFormat": "Inside Inlet",
|
|
"instant": true
|
|
},
|
|
{
|
|
"refId": "C",
|
|
"expr": "round(max(typhon_fan_speed_level{fan_group=\"outside_inlet\"}) or on() vector(0))",
|
|
"legendFormat": "Outside Inlet",
|
|
"instant": true
|
|
},
|
|
{
|
|
"refId": "D",
|
|
"expr": "round(max(typhon_fan_speed_level{fan_group=\"interior\"}) or on() vector(0))",
|
|
"legendFormat": "Interior Fans",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 7
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 9
|
|
}
|
|
]
|
|
},
|
|
"unit": "none",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
},
|
|
"decimals": 0
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "name_and_value",
|
|
"orientation": "horizontal",
|
|
"wideLayout": true
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Power",
|
|
"url": "/d/atlas-power",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 141,
|
|
"type": "timeseries",
|
|
"title": "Fan History (0-10)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 6,
|
|
"w": 4,
|
|
"x": 20,
|
|
"y": 12
|
|
},
|
|
"targets": [
|
|
{
|
|
"refId": "A",
|
|
"expr": "typhon_fan_speed_level{fan_group=\"outlet\"}",
|
|
"legendFormat": "Inside Outlet"
|
|
},
|
|
{
|
|
"refId": "B",
|
|
"expr": "typhon_fan_speed_level{fan_group=\"inside_inlet\"}",
|
|
"legendFormat": "Inside Inlet"
|
|
},
|
|
{
|
|
"refId": "C",
|
|
"expr": "typhon_fan_speed_level{fan_group=\"outside_inlet\"}",
|
|
"legendFormat": "Outside Inlet"
|
|
},
|
|
{
|
|
"refId": "D",
|
|
"expr": "typhon_fan_speed_level{fan_group=\"interior\"}",
|
|
"legendFormat": "Interior Fans"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"max": 10
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "bottom"
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Power",
|
|
"url": "/d/atlas-power",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 44,
|
|
"type": "bargauge",
|
|
"title": "One-off Job Pods (age hours)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 6,
|
|
"x": 0,
|
|
"y": 7
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sort_desc(((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"}))",
|
|
"refId": "A",
|
|
"legendFormat": "{{namespace}}/{{pod}}",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "h",
|
|
"min": 0,
|
|
"max": null,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 6
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 24
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 48
|
|
}
|
|
]
|
|
},
|
|
"decimals": 2
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"displayMode": "gradient",
|
|
"orientation": "horizontal",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Testing",
|
|
"url": "/d/atlas-jobs",
|
|
"targetBlank": true
|
|
}
|
|
],
|
|
"transformations": [
|
|
{
|
|
"id": "sortBy",
|
|
"options": {
|
|
"fields": [
|
|
"Value"
|
|
],
|
|
"order": "desc"
|
|
}
|
|
},
|
|
{
|
|
"id": "limit",
|
|
"options": {
|
|
"limit": 12
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 45,
|
|
"type": "timeseries",
|
|
"title": "Ariadne Attempts / Failures",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 6,
|
|
"x": 6,
|
|
"y": 7
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(increase(ariadne_task_runs_total[$__interval]))",
|
|
"refId": "A",
|
|
"legendFormat": "Attempts"
|
|
},
|
|
{
|
|
"expr": "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))",
|
|
"refId": "B",
|
|
"legendFormat": "Failures"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none"
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Attempts"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"mode": "fixed",
|
|
"fixedColor": "green"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Failures"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "color",
|
|
"value": {
|
|
"mode": "fixed",
|
|
"fixedColor": "red"
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right"
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Testing",
|
|
"url": "/d/atlas-jobs",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 46,
|
|
"type": "timeseries",
|
|
"title": "Platform Test Success Rate",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 6,
|
|
"x": 12,
|
|
"y": 7
|
|
},
|
|
"targets": [
|
|
{
|
|
"refId": "A",
|
|
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0) or on() vector(0)",
|
|
"legendFormat": "ariadne"
|
|
},
|
|
{
|
|
"refId": "B",
|
|
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0) or on() vector(0)",
|
|
"legendFormat": "metis"
|
|
},
|
|
{
|
|
"refId": "C",
|
|
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0) or on() vector(0)",
|
|
"legendFormat": "ananke"
|
|
},
|
|
{
|
|
"refId": "D",
|
|
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0) or on() vector(0)",
|
|
"legendFormat": "atlasbot"
|
|
},
|
|
{
|
|
"refId": "E",
|
|
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0) or on() vector(0)",
|
|
"legendFormat": "lesavka"
|
|
},
|
|
{
|
|
"refId": "F",
|
|
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0) or on() vector(0)",
|
|
"legendFormat": "pegasus"
|
|
},
|
|
{
|
|
"refId": "G",
|
|
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0) or on() vector(0)",
|
|
"legendFormat": "soteria"
|
|
},
|
|
{
|
|
"refId": "H",
|
|
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0) or on() vector(0)",
|
|
"legendFormat": "titan-iac"
|
|
},
|
|
{
|
|
"refId": "I",
|
|
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0) or on() vector(0)",
|
|
"legendFormat": "bstein-home"
|
|
},
|
|
{
|
|
"refId": "J",
|
|
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0) or on() vector(0)",
|
|
"legendFormat": "arcanagon"
|
|
},
|
|
{
|
|
"refId": "K",
|
|
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0) or on() vector(0)",
|
|
"legendFormat": "data-prepper"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent",
|
|
"min": 0,
|
|
"max": 100,
|
|
"custom": {
|
|
"drawStyle": "line",
|
|
"lineInterpolation": "linear",
|
|
"lineWidth": 2,
|
|
"fillOpacity": 10,
|
|
"showPoints": "always",
|
|
"pointSize": 4,
|
|
"spanNulls": true
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right",
|
|
"calcs": [
|
|
"lastNotNull"
|
|
]
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Testing",
|
|
"url": "/d/atlas-jobs",
|
|
"targetBlank": true
|
|
}
|
|
],
|
|
"timeFrom": "7d",
|
|
"description": "Per-run interval pass points (0-100) for each software suite over the last 7 days. Points are connected to show trend; missing-run intervals are ignored."
|
|
},
|
|
{
|
|
"id": 47,
|
|
"type": "bargauge",
|
|
"title": "PVC Backup Health / Age",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 5,
|
|
"w": 6,
|
|
"x": 18,
|
|
"y": 7
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours or on(namespace, pvc) ((1 - pvc_backup_health) * 999)))",
|
|
"refId": "A",
|
|
"legendFormat": "{{namespace}}/{{pvc}}",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "h",
|
|
"min": 0,
|
|
"max": null,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 20
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 40
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 50
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"displayMode": "gradient",
|
|
"orientation": "horizontal",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
}
|
|
},
|
|
"transformations": [
|
|
{
|
|
"id": "sortBy",
|
|
"options": {
|
|
"fields": [
|
|
"Value"
|
|
],
|
|
"order": "desc"
|
|
}
|
|
}
|
|
],
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Storage",
|
|
"url": "/d/atlas-storage",
|
|
"targetBlank": true
|
|
}
|
|
],
|
|
"description": "Oldest successful backup age in hours by PVC with nightly cadence thresholds (green <=20h, yellow <40h, orange <50h, red >=50h). PVCs with missing or unhealthy backup state are forced to 999h so critical bars stay visible."
|
|
},
|
|
{
|
|
"id": 30,
|
|
"type": "stat",
|
|
"title": "Mail Sent (1d)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 4,
|
|
"x": 0,
|
|
"y": 18
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "max(postmark_outbound_sent{window=\"1d\"})",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "rgba(115, 115, 115, 1)",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 1
|
|
}
|
|
]
|
|
},
|
|
"unit": "none",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Mail",
|
|
"url": "/d/atlas-mail",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 31,
|
|
"type": "stat",
|
|
"title": "Mail Bounces (1d)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 4,
|
|
"x": 8,
|
|
"y": 18
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})",
|
|
"refId": "A",
|
|
"legendFormat": "Rate"
|
|
},
|
|
{
|
|
"expr": "max(postmark_outbound_bounced{window=\"1d\"})",
|
|
"refId": "B",
|
|
"legendFormat": "Count"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
},
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 5
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 8
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 10
|
|
}
|
|
]
|
|
},
|
|
"unit": "none"
|
|
},
|
|
"overrides": [
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Rate"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "unit",
|
|
"value": "percent"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"matcher": {
|
|
"id": "byName",
|
|
"options": "Count"
|
|
},
|
|
"properties": [
|
|
{
|
|
"id": "unit",
|
|
"value": "none"
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "name_and_value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Mail",
|
|
"url": "/d/atlas-mail",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 32,
|
|
"type": "stat",
|
|
"title": "Mail Success Rate (1d)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 4,
|
|
"x": 4,
|
|
"y": 18
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "clamp_min(100 - max(postmark_outbound_bounce_rate{window=\"1d\"}), 0)",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "red",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 90
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 95
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 98
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
},
|
|
"decimals": 1
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Mail",
|
|
"url": "/d/atlas-mail",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 33,
|
|
"type": "stat",
|
|
"title": "Mail Limit Used (30d)",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 4,
|
|
"x": 12,
|
|
"y": 18
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "max(postmark_sending_limit_used_percent)",
|
|
"refId": "A"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 70
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 85
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 95
|
|
}
|
|
]
|
|
},
|
|
"unit": "percent",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
},
|
|
"decimals": 1
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "value"
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Mail",
|
|
"url": "/d/atlas-mail",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 34,
|
|
"type": "stat",
|
|
"title": "Postgres Connections Used",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 4,
|
|
"x": 16,
|
|
"y": 18
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "label_replace(sum(pg_stat_activity_count), \"conn\", \"used\", \"__name__\", \".*\") or label_replace(max(pg_settings_max_connections), \"conn\", \"max\", \"__name__\", \".*\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{conn}}",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "rgba(115, 115, 115, 1)",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 1
|
|
}
|
|
]
|
|
},
|
|
"unit": "none",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
},
|
|
"decimals": 0
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "name_and_value"
|
|
}
|
|
},
|
|
{
|
|
"id": 35,
|
|
"type": "stat",
|
|
"title": "Postgres Hottest Connections",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 2,
|
|
"w": 4,
|
|
"x": 20,
|
|
"y": 18
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "topk(1, sum by (datname) (pg_stat_activity_count))",
|
|
"refId": "A",
|
|
"legendFormat": "{{datname}}",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"color": {
|
|
"mode": "thresholds"
|
|
},
|
|
"mappings": [],
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "rgba(115, 115, 115, 1)",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "green",
|
|
"value": 1
|
|
}
|
|
]
|
|
},
|
|
"unit": "none",
|
|
"custom": {
|
|
"displayMode": "auto"
|
|
},
|
|
"decimals": 0
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"colorMode": "value",
|
|
"graphMode": "area",
|
|
"justifyMode": "center",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
},
|
|
"textMode": "name_and_value"
|
|
}
|
|
},
|
|
{
|
|
"id": 11,
|
|
"type": "piechart",
|
|
"title": "Namespace CPU Share",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 8,
|
|
"x": 0,
|
|
"y": 23
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ), 1)",
|
|
"refId": "A",
|
|
"legendFormat": "{{namespace}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent",
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "right"
|
|
},
|
|
"pieType": "pie",
|
|
"displayLabels": [],
|
|
"tooltip": {
|
|
"mode": "single"
|
|
},
|
|
"colorScheme": "interpolateSpectral",
|
|
"colorBy": "value",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Workload namespaces only",
|
|
"url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
|
"targetBlank": false
|
|
},
|
|
{
|
|
"title": "All namespaces",
|
|
"url": "?var-namespace_scope_cpu=namespace%3D~%22.%2A%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
|
"targetBlank": false
|
|
},
|
|
{
|
|
"title": "Infrastructure namespaces only",
|
|
"url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
|
"targetBlank": false
|
|
}
|
|
],
|
|
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
|
|
},
|
|
{
|
|
"id": 12,
|
|
"type": "piechart",
|
|
"title": "Namespace GPU Share",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 8,
|
|
"x": 8,
|
|
"y": 23
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "(100 * (sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) / clamp_min((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") * scalar((sum(sum by (namespace) ((sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))) / clamp_min(sum by (node) (sum by (namespace,node) (kube_pod_container_resource_requests{resource=~\"nvidia.com/gpu.*\",$namespace_scope_gpu} * on(namespace,pod) group_left(node) kube_pod_info * on(node) group_left() (kube_node_labels{label_accelerator=~\".+\"} or kube_node_labels{label_jetson=\"true\"}))), 1) * on(node) group_left() (avg by (node) (label_replace(label_replace(DCGM_FI_DEV_GPU_UTIL, \"pod\", \"$1\", \"Hostname\", \"(.*)\"), \"namespace\", \"monitoring\", \"\", \"\") * on(namespace,pod) group_left(node) kube_pod_info{namespace=\"monitoring\"}) or max by (node) (jetson_gr3d_freq_percent{node!=\"\"})))) or on() vector(0)) == bool 0))",
|
|
"refId": "A",
|
|
"legendFormat": "{{namespace}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent",
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "right"
|
|
},
|
|
"pieType": "pie",
|
|
"displayLabels": [],
|
|
"tooltip": {
|
|
"mode": "single"
|
|
},
|
|
"colorScheme": "interpolateSpectral",
|
|
"colorBy": "value",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Workload namespaces only",
|
|
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
|
"targetBlank": false
|
|
},
|
|
{
|
|
"title": "All namespaces",
|
|
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22.%2A%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
|
"targetBlank": false
|
|
},
|
|
{
|
|
"title": "Infrastructure namespaces only",
|
|
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
|
"targetBlank": false
|
|
}
|
|
],
|
|
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
|
|
},
|
|
{
|
|
"id": 13,
|
|
"type": "piechart",
|
|
"title": "Namespace RAM Share",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 9,
|
|
"w": 8,
|
|
"x": 16,
|
|
"y": 23
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ), 1)",
|
|
"refId": "A",
|
|
"legendFormat": "{{namespace}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent",
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "right"
|
|
},
|
|
"pieType": "pie",
|
|
"displayLabels": [],
|
|
"tooltip": {
|
|
"mode": "single"
|
|
},
|
|
"colorScheme": "interpolateSpectral",
|
|
"colorBy": "value",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Workload namespaces only",
|
|
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22",
|
|
"targetBlank": false
|
|
},
|
|
{
|
|
"title": "All namespaces",
|
|
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22.%2A%22",
|
|
"targetBlank": false
|
|
},
|
|
{
|
|
"title": "Infrastructure namespaces only",
|
|
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22",
|
|
"targetBlank": false
|
|
}
|
|
],
|
|
"description": "Shares are normalized within the selected filter. Switching scope changes the denominator."
|
|
},
|
|
{
|
|
"id": 14,
|
|
"type": "timeseries",
|
|
"title": "Worker Node CPU",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 12,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 39
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right",
|
|
"calcs": [
|
|
"last"
|
|
]
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Nodes",
|
|
"url": "/d/atlas-nodes",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 15,
|
|
"type": "timeseries",
|
|
"title": "Worker Node RAM",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 12,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 39
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right",
|
|
"calcs": [
|
|
"last"
|
|
]
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Nodes",
|
|
"url": "/d/atlas-nodes",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 16,
|
|
"type": "timeseries",
|
|
"title": "Control plane CPU",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 51
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right"
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 17,
|
|
"type": "timeseries",
|
|
"title": "Control plane RAM",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 51
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right"
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 28,
|
|
"type": "piechart",
|
|
"title": "Node Pod Share",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 61
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "(sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node) / clamp_min(sum(kube_pod_info{pod!=\"\" , node!=\"\"}), 1)) * 100",
|
|
"refId": "A",
|
|
"legendFormat": "{{namespace}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent",
|
|
"color": {
|
|
"mode": "palette-classic"
|
|
}
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "right"
|
|
},
|
|
"pieType": "pie",
|
|
"displayLabels": [],
|
|
"tooltip": {
|
|
"mode": "single"
|
|
},
|
|
"colorScheme": "interpolateSpectral",
|
|
"colorBy": "value",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"id": 29,
|
|
"type": "bargauge",
|
|
"title": "Top Nodes by Pod Count",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 61
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sort_desc(topk(12, sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node)))",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}",
|
|
"instant": true
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "none",
|
|
"min": 0,
|
|
"max": null,
|
|
"thresholds": {
|
|
"mode": "absolute",
|
|
"steps": [
|
|
{
|
|
"color": "green",
|
|
"value": null
|
|
},
|
|
{
|
|
"color": "yellow",
|
|
"value": 50
|
|
},
|
|
{
|
|
"color": "orange",
|
|
"value": 75
|
|
},
|
|
{
|
|
"color": "red",
|
|
"value": 100
|
|
}
|
|
]
|
|
},
|
|
"decimals": 0
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"displayMode": "gradient",
|
|
"orientation": "horizontal",
|
|
"reduceOptions": {
|
|
"calcs": [
|
|
"lastNotNull"
|
|
],
|
|
"fields": "",
|
|
"values": false
|
|
}
|
|
},
|
|
"transformations": [
|
|
{
|
|
"id": "sortBy",
|
|
"options": {
|
|
"fields": [
|
|
"Value"
|
|
],
|
|
"order": "desc"
|
|
}
|
|
},
|
|
{
|
|
"id": "limit",
|
|
"options": {
|
|
"limit": 12
|
|
}
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 18,
|
|
"type": "timeseries",
|
|
"title": "Cluster Ingress Throughput",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 7,
|
|
"w": 8,
|
|
"x": 0,
|
|
"y": 32
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(node_network_receive_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
|
"refId": "A",
|
|
"legendFormat": "Ingress (Traefik)"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "Bps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "bottom"
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Network",
|
|
"url": "/d/atlas-network",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 19,
|
|
"type": "timeseries",
|
|
"title": "Cluster Egress Throughput",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 7,
|
|
"w": 8,
|
|
"x": 8,
|
|
"y": 32
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(node_network_transmit_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)",
|
|
"refId": "A",
|
|
"legendFormat": "Egress (Traefik)"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "Bps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "bottom"
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Network",
|
|
"url": "/d/atlas-network",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 20,
|
|
"type": "timeseries",
|
|
"title": "Intra-Cluster Throughput",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 7,
|
|
"w": 8,
|
|
"x": 16,
|
|
"y": 32
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "sum(rate(container_network_receive_bytes_total{namespace!=\"traefik\",pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"traefik\",pod!=\"\"}[5m])) or on() vector(0)",
|
|
"refId": "A",
|
|
"legendFormat": "Internal traffic"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "Bps"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "list",
|
|
"placement": "bottom"
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Network",
|
|
"url": "/d/atlas-network",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 21,
|
|
"type": "timeseries",
|
|
"title": "Root Filesystem Usage",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 16,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 71
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right",
|
|
"calcs": [
|
|
"last"
|
|
]
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"timeFrom": "30d",
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Storage",
|
|
"url": "/d/atlas-storage",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 22,
|
|
"type": "timeseries",
|
|
"title": "Nodes Closest to Full Astraios Disks",
|
|
"datasource": {
|
|
"type": "prometheus",
|
|
"uid": "atlas-vm"
|
|
},
|
|
"gridPos": {
|
|
"h": 16,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 71
|
|
},
|
|
"targets": [
|
|
{
|
|
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
|
|
"refId": "A",
|
|
"legendFormat": "{{node}}"
|
|
}
|
|
],
|
|
"fieldConfig": {
|
|
"defaults": {
|
|
"unit": "percent"
|
|
},
|
|
"overrides": []
|
|
},
|
|
"options": {
|
|
"legend": {
|
|
"displayMode": "table",
|
|
"placement": "right",
|
|
"calcs": [
|
|
"last"
|
|
]
|
|
},
|
|
"tooltip": {
|
|
"mode": "multi"
|
|
}
|
|
},
|
|
"timeFrom": "1w",
|
|
"links": [
|
|
{
|
|
"title": "Open Atlas Storage",
|
|
"url": "/d/atlas-storage",
|
|
"targetBlank": true
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"schemaVersion": 39,
|
|
"style": "dark",
|
|
"tags": [
|
|
"atlas",
|
|
"overview"
|
|
],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"name": "namespace_scope_cpu",
|
|
"label": "CPU namespace filter",
|
|
"type": "custom",
|
|
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
|
"current": {
|
|
"text": "workload namespaces only",
|
|
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
|
"selected": true
|
|
},
|
|
"options": [
|
|
{
|
|
"text": "workload namespaces only",
|
|
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
|
"selected": true
|
|
},
|
|
{
|
|
"text": "all namespaces",
|
|
"value": "namespace=~\".*\"",
|
|
"selected": false
|
|
},
|
|
{
|
|
"text": "infrastructure namespaces only",
|
|
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
|
"selected": false
|
|
}
|
|
],
|
|
"hide": 2,
|
|
"multi": false,
|
|
"includeAll": false,
|
|
"refresh": 1,
|
|
"sort": 0,
|
|
"skipUrlSync": false
|
|
},
|
|
{
|
|
"name": "namespace_scope_gpu",
|
|
"label": "GPU namespace filter",
|
|
"type": "custom",
|
|
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
|
"current": {
|
|
"text": "workload namespaces only",
|
|
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
|
"selected": true
|
|
},
|
|
"options": [
|
|
{
|
|
"text": "workload namespaces only",
|
|
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
|
"selected": true
|
|
},
|
|
{
|
|
"text": "all namespaces",
|
|
"value": "namespace=~\".*\"",
|
|
"selected": false
|
|
},
|
|
{
|
|
"text": "infrastructure namespaces only",
|
|
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
|
"selected": false
|
|
}
|
|
],
|
|
"hide": 2,
|
|
"multi": false,
|
|
"includeAll": false,
|
|
"refresh": 1,
|
|
"sort": 0,
|
|
"skipUrlSync": false
|
|
},
|
|
{
|
|
"name": "namespace_scope_ram",
|
|
"label": "RAM namespace filter",
|
|
"type": "custom",
|
|
"query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
|
"current": {
|
|
"text": "workload namespaces only",
|
|
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
|
"selected": true
|
|
},
|
|
"options": [
|
|
{
|
|
"text": "workload namespaces only",
|
|
"value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
|
"selected": true
|
|
},
|
|
{
|
|
"text": "all namespaces",
|
|
"value": "namespace=~\".*\"",
|
|
"selected": false
|
|
},
|
|
{
|
|
"text": "infrastructure namespaces only",
|
|
"value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"",
|
|
"selected": false
|
|
}
|
|
],
|
|
"hide": 2,
|
|
"multi": false,
|
|
"includeAll": false,
|
|
"refresh": 1,
|
|
"sort": 0,
|
|
"skipUrlSync": false
|
|
}
|
|
]
|
|
},
|
|
"time": {
|
|
"from": "now-1h",
|
|
"to": "now"
|
|
},
|
|
"refresh": "1m",
|
|
"links": [
|
|
{
|
|
"title": "Atlas Testing (Internal)",
|
|
"url": "/d/atlas-jobs",
|
|
"targetBlank": false
|
|
}
|
|
]
|
|
}
|