{ "uid": "atlas-pods", "title": "Atlas Pods", "folderUid": "atlas-internal", "editable": true, "panels": [ { "id": 1, "type": "stat", "title": "Problem Pods", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 }, "targets": [ { "expr": "sum(max by (namespace,pod) (kube_pod_status_phase{phase!~\"Running|Succeeded\"})) or on() vector(0)", "refId": "A" } ], "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 1 } ] }, "unit": "none", "custom": { "displayMode": "auto" } }, "overrides": [] }, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "value" } }, { "id": 2, "type": "stat", "title": "CrashLoop / ImagePull", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 }, "targets": [ { "expr": "sum(max by (namespace,pod) (kube_pod_container_status_waiting_reason{reason=~\"CrashLoopBackOff|ImagePullBackOff\"})) or on() vector(0)", "refId": "A" } ], "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 1 } ] }, "unit": "none", "custom": { "displayMode": "auto" } }, "overrides": [] }, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "value" } }, { "id": 3, "type": "stat", "title": "Stuck Terminating (>10m)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 }, "targets": [ { "expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0))) or on() vector(0)", "refId": "A" } ], "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 1 } ] }, "unit": "none", "custom": { "displayMode": "auto" } }, "overrides": [] }, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "value" } }, { "id": 4, "type": "stat", "title": "Control Plane Workloads", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 }, "targets": [ { "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})", "refId": "A" } ], "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 1 } ] }, "unit": "none", "custom": { "displayMode": "auto" } }, "overrides": [] }, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "value" } }, { "id": 5, "type": "table", "title": "Pods Not Running", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 10, "w": 24, "x": 0, "y": 4 }, "targets": [ { "expr": "(time() - kube_pod_created{pod!=\"\"}) * on(namespace,pod) group_left(node) kube_pod_info * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase!~\"Running|Succeeded\"})", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] }, "options": { "showHeader": true }, "transformations": [ { "id": "labelsToFields", "options": {} } ] }, { "id": 6, "type": "table", "title": "CrashLoop / ImagePull", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 10, "w": 24, "x": 0, "y": 14 }, "targets": [ { "expr": "(time() - kube_pod_created{pod!=\"\"}) * on(namespace,pod) group_left(node) kube_pod_info * on(namespace,pod,container) group_left(reason) max by (namespace,pod,container,reason) (kube_pod_container_status_waiting_reason{reason=~\"CrashLoopBackOff|ImagePullBackOff\"})", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] }, "options": { "showHeader": true }, "transformations": [ { "id": "labelsToFields", "options": {} } ] }, { "id": 7, "type": "table", "title": "Terminating >10m", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 10, "w": 24, "x": 0, "y": 24 }, "targets": [ { "expr": "(((time() - kube_pod_deletion_timestamp{pod!=\"\"}) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)) * on(namespace,pod) group_left(node) kube_pod_info)", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] }, "options": { "showHeader": true }, "transformations": [ { "id": "labelsToFields", "options": {} }, { "id": "filterByValue", "options": { "match": "Value", "operator": "gt", "value": 600 } } ] }, { "id": 8, "type": "piechart", "title": "Node Pod Share", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 34 }, "targets": [ { "expr": "sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node)", "refId": "A", "legendFormat": "{{namespace}}" } ], "fieldConfig": { "defaults": { "unit": "percent", "color": { "mode": "palette-classic" } }, "overrides": [] }, "options": { "legend": { "displayMode": "list", "placement": "right" }, "pieType": "pie", "displayLabels": [ "name", "percent" ], "tooltip": { "mode": "single" }, "colorScheme": "interpolateSpectral", "colorBy": "value", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false } } }, { "id": 9, "type": "bargauge", "title": "Top Nodes by Pod Count", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 34 }, "targets": [ { "expr": "topk(12, sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node))", "refId": "A", "legendFormat": "{{node}}", "instant": true } ], "fieldConfig": { "defaults": { "unit": "none", "min": 0, "max": null, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 50 }, { "color": "orange", "value": 75 }, { "color": "red", "value": 100 } ] }, "decimals": 0 }, "overrides": [] }, "options": { "displayMode": "gradient", "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false } }, "transformations": [ { "id": "sortBy", "options": { "fields": [ "Value" ], "order": "desc" } }, { "id": "limit", "options": { "limit": 12 } } ] } ], "time": { "from": "now-12h", "to": "now" }, "annotations": { "list": [] }, "schemaVersion": 39, "style": "dark", "tags": [ "atlas", "pods" ] }