2025-11-17 14:22:46 -03:00
|
|
|
{
|
|
|
|
|
"uid": "atlas-pods",
|
|
|
|
|
"title": "Atlas Pods",
|
2025-11-17 16:27:38 -03:00
|
|
|
"folderUid": "atlas-internal",
|
2025-11-17 14:22:46 -03:00
|
|
|
"editable": true,
|
|
|
|
|
"panels": [
|
|
|
|
|
{
|
|
|
|
|
"id": 1,
|
2025-11-17 16:27:38 -03:00
|
|
|
"type": "stat",
|
2025-12-02 14:41:39 -03:00
|
|
|
"title": "Problem Pods",
|
2025-11-17 16:27:38 -03:00
|
|
|
"datasource": {
|
|
|
|
|
"type": "prometheus",
|
|
|
|
|
"uid": "atlas-vm"
|
|
|
|
|
},
|
|
|
|
|
"gridPos": {
|
|
|
|
|
"h": 4,
|
|
|
|
|
"w": 6,
|
|
|
|
|
"x": 0,
|
|
|
|
|
"y": 0
|
|
|
|
|
},
|
|
|
|
|
"targets": [
|
|
|
|
|
{
|
|
|
|
|
"expr": "sum(max by (namespace,pod) (kube_pod_status_phase{phase!~\"Running|Succeeded\"}))",
|
|
|
|
|
"refId": "A"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"fieldConfig": {
|
|
|
|
|
"defaults": {
|
|
|
|
|
"color": {
|
|
|
|
|
"mode": "palette-classic"
|
|
|
|
|
},
|
|
|
|
|
"mappings": [],
|
|
|
|
|
"thresholds": {
|
|
|
|
|
"mode": "absolute",
|
|
|
|
|
"steps": [
|
|
|
|
|
{
|
|
|
|
|
"color": "green",
|
|
|
|
|
"value": null
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"color": "red",
|
|
|
|
|
"value": 1
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"unit": "none",
|
|
|
|
|
"custom": {
|
|
|
|
|
"displayMode": "auto"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"overrides": []
|
|
|
|
|
},
|
|
|
|
|
"options": {
|
|
|
|
|
"colorMode": "value",
|
|
|
|
|
"graphMode": "area",
|
|
|
|
|
"justifyMode": "center",
|
|
|
|
|
"reduceOptions": {
|
|
|
|
|
"calcs": [
|
|
|
|
|
"lastNotNull"
|
|
|
|
|
],
|
|
|
|
|
"fields": "",
|
|
|
|
|
"values": false
|
|
|
|
|
},
|
|
|
|
|
"textMode": "value"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"id": 2,
|
|
|
|
|
"type": "stat",
|
|
|
|
|
"title": "CrashLoop / ImagePull",
|
|
|
|
|
"datasource": {
|
|
|
|
|
"type": "prometheus",
|
|
|
|
|
"uid": "atlas-vm"
|
|
|
|
|
},
|
|
|
|
|
"gridPos": {
|
|
|
|
|
"h": 4,
|
|
|
|
|
"w": 6,
|
|
|
|
|
"x": 6,
|
|
|
|
|
"y": 0
|
|
|
|
|
},
|
|
|
|
|
"targets": [
|
|
|
|
|
{
|
|
|
|
|
"expr": "sum(max by (namespace,pod) (kube_pod_container_status_waiting_reason{reason=~\"CrashLoopBackOff|ImagePullBackOff\"}))",
|
|
|
|
|
"refId": "A"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"fieldConfig": {
|
|
|
|
|
"defaults": {
|
|
|
|
|
"color": {
|
|
|
|
|
"mode": "palette-classic"
|
|
|
|
|
},
|
|
|
|
|
"mappings": [],
|
|
|
|
|
"thresholds": {
|
|
|
|
|
"mode": "absolute",
|
|
|
|
|
"steps": [
|
|
|
|
|
{
|
|
|
|
|
"color": "green",
|
|
|
|
|
"value": null
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"color": "red",
|
|
|
|
|
"value": 1
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"unit": "none",
|
|
|
|
|
"custom": {
|
|
|
|
|
"displayMode": "auto"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"overrides": []
|
|
|
|
|
},
|
|
|
|
|
"options": {
|
|
|
|
|
"colorMode": "value",
|
|
|
|
|
"graphMode": "area",
|
|
|
|
|
"justifyMode": "center",
|
|
|
|
|
"reduceOptions": {
|
|
|
|
|
"calcs": [
|
|
|
|
|
"lastNotNull"
|
|
|
|
|
],
|
|
|
|
|
"fields": "",
|
|
|
|
|
"values": false
|
|
|
|
|
},
|
|
|
|
|
"textMode": "value"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"id": 3,
|
|
|
|
|
"type": "stat",
|
2025-12-02 14:41:39 -03:00
|
|
|
"title": "Stuck Terminating (>10m)",
|
2025-11-17 16:27:38 -03:00
|
|
|
"datasource": {
|
|
|
|
|
"type": "prometheus",
|
|
|
|
|
"uid": "atlas-vm"
|
|
|
|
|
},
|
|
|
|
|
"gridPos": {
|
|
|
|
|
"h": 4,
|
|
|
|
|
"w": 6,
|
|
|
|
|
"x": 12,
|
|
|
|
|
"y": 0
|
|
|
|
|
},
|
|
|
|
|
"targets": [
|
|
|
|
|
{
|
2025-11-17 18:55:11 -03:00
|
|
|
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)))",
|
2025-11-17 16:27:38 -03:00
|
|
|
"refId": "A"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"fieldConfig": {
|
|
|
|
|
"defaults": {
|
|
|
|
|
"color": {
|
|
|
|
|
"mode": "palette-classic"
|
|
|
|
|
},
|
|
|
|
|
"mappings": [],
|
|
|
|
|
"thresholds": {
|
|
|
|
|
"mode": "absolute",
|
|
|
|
|
"steps": [
|
|
|
|
|
{
|
|
|
|
|
"color": "green",
|
|
|
|
|
"value": null
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"color": "red",
|
|
|
|
|
"value": 1
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"unit": "none",
|
|
|
|
|
"custom": {
|
|
|
|
|
"displayMode": "auto"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"overrides": []
|
|
|
|
|
},
|
|
|
|
|
"options": {
|
|
|
|
|
"colorMode": "value",
|
|
|
|
|
"graphMode": "area",
|
|
|
|
|
"justifyMode": "center",
|
|
|
|
|
"reduceOptions": {
|
|
|
|
|
"calcs": [
|
|
|
|
|
"lastNotNull"
|
|
|
|
|
],
|
|
|
|
|
"fields": "",
|
|
|
|
|
"values": false
|
|
|
|
|
},
|
|
|
|
|
"textMode": "value"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"id": 4,
|
|
|
|
|
"type": "stat",
|
2025-12-02 14:41:39 -03:00
|
|
|
"title": "Control Plane Workloads",
|
2025-11-17 16:27:38 -03:00
|
|
|
"datasource": {
|
|
|
|
|
"type": "prometheus",
|
|
|
|
|
"uid": "atlas-vm"
|
|
|
|
|
},
|
|
|
|
|
"gridPos": {
|
|
|
|
|
"h": 4,
|
|
|
|
|
"w": 6,
|
|
|
|
|
"x": 18,
|
|
|
|
|
"y": 0
|
|
|
|
|
},
|
|
|
|
|
"targets": [
|
|
|
|
|
{
|
2025-11-18 16:18:52 -03:00
|
|
|
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
2025-11-17 16:27:38 -03:00
|
|
|
"refId": "A"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"fieldConfig": {
|
|
|
|
|
"defaults": {
|
|
|
|
|
"color": {
|
|
|
|
|
"mode": "palette-classic"
|
|
|
|
|
},
|
|
|
|
|
"mappings": [],
|
|
|
|
|
"thresholds": {
|
|
|
|
|
"mode": "absolute",
|
|
|
|
|
"steps": [
|
|
|
|
|
{
|
|
|
|
|
"color": "green",
|
|
|
|
|
"value": null
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"color": "red",
|
|
|
|
|
"value": 1
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"unit": "none",
|
|
|
|
|
"custom": {
|
|
|
|
|
"displayMode": "auto"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"overrides": []
|
|
|
|
|
},
|
|
|
|
|
"options": {
|
|
|
|
|
"colorMode": "value",
|
|
|
|
|
"graphMode": "area",
|
|
|
|
|
"justifyMode": "center",
|
|
|
|
|
"reduceOptions": {
|
|
|
|
|
"calcs": [
|
|
|
|
|
"lastNotNull"
|
|
|
|
|
],
|
|
|
|
|
"fields": "",
|
|
|
|
|
"values": false
|
|
|
|
|
},
|
|
|
|
|
"textMode": "value"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"id": 5,
|
2025-11-17 14:22:46 -03:00
|
|
|
"type": "table",
|
2025-12-02 14:41:39 -03:00
|
|
|
"title": "Pods Not Running",
|
2025-11-17 14:22:46 -03:00
|
|
|
"datasource": {
|
|
|
|
|
"type": "prometheus",
|
|
|
|
|
"uid": "atlas-vm"
|
|
|
|
|
},
|
|
|
|
|
"gridPos": {
|
|
|
|
|
"h": 10,
|
|
|
|
|
"w": 24,
|
|
|
|
|
"x": 0,
|
2025-11-17 16:27:38 -03:00
|
|
|
"y": 4
|
2025-11-17 14:22:46 -03:00
|
|
|
},
|
|
|
|
|
"targets": [
|
|
|
|
|
{
|
2025-11-17 16:27:38 -03:00
|
|
|
"expr": "(time() - kube_pod_created{pod!=\"\"}) * on(namespace,pod) group_left(node) kube_pod_info * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase!~\"Running|Succeeded\"})",
|
2025-11-17 14:22:46 -03:00
|
|
|
"refId": "A"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"fieldConfig": {
|
|
|
|
|
"defaults": {
|
|
|
|
|
"unit": "s"
|
|
|
|
|
},
|
|
|
|
|
"overrides": []
|
|
|
|
|
},
|
|
|
|
|
"options": {
|
|
|
|
|
"showHeader": true
|
|
|
|
|
},
|
|
|
|
|
"transformations": [
|
|
|
|
|
{
|
|
|
|
|
"id": "labelsToFields",
|
|
|
|
|
"options": {}
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
2025-11-17 16:27:38 -03:00
|
|
|
"id": 6,
|
2025-11-17 14:22:46 -03:00
|
|
|
"type": "table",
|
|
|
|
|
"title": "CrashLoop / ImagePull",
|
|
|
|
|
"datasource": {
|
|
|
|
|
"type": "prometheus",
|
|
|
|
|
"uid": "atlas-vm"
|
|
|
|
|
},
|
|
|
|
|
"gridPos": {
|
|
|
|
|
"h": 10,
|
|
|
|
|
"w": 24,
|
|
|
|
|
"x": 0,
|
2025-11-17 16:27:38 -03:00
|
|
|
"y": 14
|
2025-11-17 14:22:46 -03:00
|
|
|
},
|
|
|
|
|
"targets": [
|
|
|
|
|
{
|
2025-11-17 16:27:38 -03:00
|
|
|
"expr": "(time() - kube_pod_created{pod!=\"\"}) * on(namespace,pod) group_left(node) kube_pod_info * on(namespace,pod,container) group_left(reason) max by (namespace,pod,container,reason) (kube_pod_container_status_waiting_reason{reason=~\"CrashLoopBackOff|ImagePullBackOff\"})",
|
2025-11-17 14:22:46 -03:00
|
|
|
"refId": "A"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"fieldConfig": {
|
|
|
|
|
"defaults": {
|
|
|
|
|
"unit": "s"
|
|
|
|
|
},
|
|
|
|
|
"overrides": []
|
|
|
|
|
},
|
|
|
|
|
"options": {
|
|
|
|
|
"showHeader": true
|
|
|
|
|
},
|
|
|
|
|
"transformations": [
|
|
|
|
|
{
|
|
|
|
|
"id": "labelsToFields",
|
|
|
|
|
"options": {}
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
2025-11-17 16:27:38 -03:00
|
|
|
"id": 7,
|
2025-11-17 14:22:46 -03:00
|
|
|
"type": "table",
|
2025-11-17 16:27:38 -03:00
|
|
|
"title": "Terminating >10m",
|
2025-11-17 14:22:46 -03:00
|
|
|
"datasource": {
|
|
|
|
|
"type": "prometheus",
|
|
|
|
|
"uid": "atlas-vm"
|
|
|
|
|
},
|
|
|
|
|
"gridPos": {
|
|
|
|
|
"h": 10,
|
|
|
|
|
"w": 24,
|
|
|
|
|
"x": 0,
|
2025-11-17 16:27:38 -03:00
|
|
|
"y": 24
|
2025-11-17 14:22:46 -03:00
|
|
|
},
|
|
|
|
|
"targets": [
|
|
|
|
|
{
|
2025-11-17 18:55:11 -03:00
|
|
|
"expr": "(((time() - kube_pod_deletion_timestamp{pod!=\"\"}) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)) * on(namespace,pod) group_left(node) kube_pod_info)",
|
2025-11-17 14:22:46 -03:00
|
|
|
"refId": "A"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"fieldConfig": {
|
|
|
|
|
"defaults": {
|
|
|
|
|
"unit": "s"
|
|
|
|
|
},
|
|
|
|
|
"overrides": []
|
|
|
|
|
},
|
|
|
|
|
"options": {
|
|
|
|
|
"showHeader": true
|
|
|
|
|
},
|
|
|
|
|
"transformations": [
|
|
|
|
|
{
|
|
|
|
|
"id": "labelsToFields",
|
|
|
|
|
"options": {}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"id": "filterByValue",
|
|
|
|
|
"options": {
|
|
|
|
|
"match": "Value",
|
|
|
|
|
"operator": "gt",
|
|
|
|
|
"value": 600
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
]
|
2025-12-12 18:32:45 -03:00
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"id": 8,
|
|
|
|
|
"type": "piechart",
|
|
|
|
|
"title": "Pods by Node",
|
|
|
|
|
"datasource": {
|
|
|
|
|
"type": "prometheus",
|
|
|
|
|
"uid": "atlas-vm"
|
|
|
|
|
},
|
|
|
|
|
"gridPos": {
|
|
|
|
|
"h": 8,
|
|
|
|
|
"w": 12,
|
|
|
|
|
"x": 12,
|
|
|
|
|
"y": 34
|
|
|
|
|
},
|
|
|
|
|
"targets": [
|
|
|
|
|
{
|
|
|
|
|
"expr": "sum(kube_pod_info{pod!=\"\"}) by (node)",
|
|
|
|
|
"refId": "A",
|
|
|
|
|
"legendFormat": "{{namespace}}"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"fieldConfig": {
|
|
|
|
|
"defaults": {
|
|
|
|
|
"unit": "percent",
|
|
|
|
|
"color": {
|
|
|
|
|
"mode": "palette-classic"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"overrides": []
|
|
|
|
|
},
|
|
|
|
|
"options": {
|
|
|
|
|
"legend": {
|
|
|
|
|
"displayMode": "list",
|
|
|
|
|
"placement": "right"
|
|
|
|
|
},
|
|
|
|
|
"pieType": "pie",
|
|
|
|
|
"displayLabels": [
|
|
|
|
|
"percent"
|
|
|
|
|
],
|
|
|
|
|
"tooltip": {
|
|
|
|
|
"mode": "single"
|
|
|
|
|
},
|
|
|
|
|
"colorScheme": "interpolateSpectral",
|
|
|
|
|
"colorBy": "value",
|
|
|
|
|
"reduceOptions": {
|
|
|
|
|
"calcs": [
|
|
|
|
|
"lastNotNull"
|
|
|
|
|
],
|
|
|
|
|
"fields": "",
|
|
|
|
|
"values": false
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-12-12 18:45:29 -03:00
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"id": 9,
|
|
|
|
|
"type": "bargauge",
|
|
|
|
|
"title": "Top Nodes by Pod Count",
|
|
|
|
|
"datasource": {
|
|
|
|
|
"type": "prometheus",
|
|
|
|
|
"uid": "atlas-vm"
|
|
|
|
|
},
|
|
|
|
|
"gridPos": {
|
|
|
|
|
"h": 8,
|
|
|
|
|
"w": 12,
|
|
|
|
|
"x": 0,
|
|
|
|
|
"y": 34
|
|
|
|
|
},
|
|
|
|
|
"targets": [
|
|
|
|
|
{
|
|
|
|
|
"expr": "topk(12, sum(kube_pod_info{pod!=\"\"}) by (node))",
|
|
|
|
|
"refId": "A",
|
|
|
|
|
"legendFormat": "{{node}}"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"fieldConfig": {
|
|
|
|
|
"defaults": {
|
|
|
|
|
"unit": "none",
|
|
|
|
|
"min": 0,
|
|
|
|
|
"max": null,
|
|
|
|
|
"thresholds": {
|
|
|
|
|
"mode": "absolute",
|
|
|
|
|
"steps": [
|
|
|
|
|
{
|
|
|
|
|
"color": "green",
|
|
|
|
|
"value": null
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"color": "yellow",
|
|
|
|
|
"value": 50
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"color": "orange",
|
|
|
|
|
"value": 70
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"color": "red",
|
|
|
|
|
"value": 85
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"overrides": []
|
|
|
|
|
},
|
|
|
|
|
"options": {
|
|
|
|
|
"displayMode": "gradient",
|
|
|
|
|
"orientation": "horizontal",
|
|
|
|
|
"reduceOptions": {
|
|
|
|
|
"calcs": [
|
|
|
|
|
"lastNotNull"
|
|
|
|
|
],
|
|
|
|
|
"fields": "",
|
|
|
|
|
"values": false
|
|
|
|
|
}
|
2025-12-12 18:51:43 -03:00
|
|
|
},
|
|
|
|
|
"transformations": [
|
|
|
|
|
{
|
|
|
|
|
"id": "sortBy",
|
|
|
|
|
"options": {
|
|
|
|
|
"fields": [
|
|
|
|
|
"Value"
|
|
|
|
|
],
|
|
|
|
|
"order": "desc"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
]
|
2025-11-17 14:22:46 -03:00
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"time": {
|
|
|
|
|
"from": "now-12h",
|
|
|
|
|
"to": "now"
|
|
|
|
|
},
|
|
|
|
|
"annotations": {
|
|
|
|
|
"list": []
|
|
|
|
|
},
|
|
|
|
|
"schemaVersion": 39,
|
|
|
|
|
"style": "dark",
|
|
|
|
|
"tags": [
|
|
|
|
|
"atlas",
|
|
|
|
|
"pods"
|
|
|
|
|
]
|
|
|
|
|
}
|