{ "uid": "atlas-pods", "title": "Atlas Pods", "folderUid": "atlas-internal", "editable": true, "panels": [ { "id": 1, "type": "stat", "title": "Problem Pods", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 }, "targets": [ { "expr": "sum(max by (namespace,pod) (kube_pod_status_phase{phase!~\"Running|Succeeded\"})) or on() vector(0)", "refId": "A" } ], "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 1 } ] }, "unit": "none", "custom": { "displayMode": "auto" } }, "overrides": [] }, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "value" } }, { "id": 2, "type": "stat", "title": "CrashLoop / ImagePull", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 }, "targets": [ { "expr": "sum(max by (namespace,pod) (kube_pod_container_status_waiting_reason{reason=~\"CrashLoopBackOff|ImagePullBackOff\"})) or on() vector(0)", "refId": "A" } ], "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 1 } ] }, "unit": "none", "custom": { "displayMode": "auto" } }, "overrides": [] }, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "value" } }, { "id": 3, "type": "stat", "title": "Stuck Terminating (>10m)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 }, "targets": [ { "expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0))) or on() vector(0)", "refId": "A" } ], "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 1 } ] }, "unit": "none", "custom": { "displayMode": "auto" } }, "overrides": [] }, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "value" } }, { "id": 4, "type": "stat", "title": "Control Plane Workloads", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 }, "targets": [ { "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})", "refId": "A" } ], "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 1 } ] }, "unit": "none", "custom": { "displayMode": "auto" } }, "overrides": [] }, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "center", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "textMode": "value" } }, { "id": 5, "type": "table", "title": "Pods Not Running", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 10, "w": 24, "x": 0, "y": 4 }, "targets": [ { "expr": "(time() - kube_pod_created{pod!=\"\"}) * on(namespace,pod) group_left(node) kube_pod_info * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase!~\"Running|Succeeded\"})", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "s", "custom": { "filterable": true } }, "overrides": [] }, "options": { "showHeader": true, "columnFilters": false }, "transformations": [ { "id": "labelsToFields", "options": {} } ] }, { "id": 6, "type": "table", "title": "CrashLoop / ImagePull", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 10, "w": 24, "x": 0, "y": 14 }, "targets": [ { "expr": "(time() - kube_pod_created{pod!=\"\"}) * on(namespace,pod) group_left(node) kube_pod_info * on(namespace,pod,container) group_left(reason) max by (namespace,pod,container,reason) (kube_pod_container_status_waiting_reason{reason=~\"CrashLoopBackOff|ImagePullBackOff\"})", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "s", "custom": { "filterable": true } }, "overrides": [] }, "options": { "showHeader": true, "columnFilters": false }, "transformations": [ { "id": "labelsToFields", "options": {} } ] }, { "id": 7, "type": "table", "title": "Terminating >10m", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 10, "w": 24, "x": 0, "y": 24 }, "targets": [ { "expr": "(((time() - kube_pod_deletion_timestamp{pod!=\"\"}) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)) * on(namespace,pod) group_left(node) kube_pod_info)", "refId": "A" } ], "fieldConfig": { "defaults": { "unit": "s", "custom": { "filterable": true } }, "overrides": [] }, "options": { "showHeader": true, "columnFilters": false }, "transformations": [ { "id": "labelsToFields", "options": {} }, { "id": "filterByValue", "options": { "match": "Value", "operator": "gt", "value": 600 } } ] }, { "id": 8, "type": "piechart", "title": "Node Pod Share", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 34 }, "targets": [ { "expr": "(sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node) / clamp_min(sum(kube_pod_info{pod!=\"\" , node!=\"\"}), 1)) * 100", "refId": "A", "legendFormat": "{{namespace}}" } ], "fieldConfig": { "defaults": { "unit": "percent", "color": { "mode": "palette-classic" } }, "overrides": [] }, "options": { "legend": { "displayMode": "list", "placement": "right" }, "pieType": "pie", "displayLabels": [], "tooltip": { "mode": "single" }, "colorScheme": "interpolateSpectral", "colorBy": "value", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false } } }, { "id": 9, "type": "bargauge", "title": "Top Nodes by Pod Count", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 34 }, "targets": [ { "expr": "topk(12, sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node))", "refId": "A", "legendFormat": "{{node}}", "instant": true } ], "fieldConfig": { "defaults": { "unit": "none", "min": 0, "max": null, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 50 }, { "color": "orange", "value": 75 }, { "color": "red", "value": 100 } ] }, "decimals": 0 }, "overrides": [] }, "options": { "displayMode": "gradient", "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false } }, "transformations": [ { "id": "sortBy", "options": { "fields": [ "Value" ], "order": "desc" } }, { "id": "limit", "options": { "limit": 12 } } ] }, { "id": 10, "type": "table", "title": "Namespace Plurality by Node v23", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 8, "w": 24, "x": 0, "y": 42 }, "targets": [ { "expr": "(sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 1e-6 * 1) + (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 1e-6 * 2) + (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 1e-6 * 3) + (sum by (node) (kube_node_info{node=\"titan-db\"}) * 1e-6 * 4) + (sum by (node) (kube_node_info{node=\"titan-04\"}) * 1e-6 * 5) + (sum by (node) (kube_node_info{node=\"titan-05\"}) * 1e-6 * 6) + (sum by (node) (kube_node_info{node=\"titan-06\"}) * 1e-6 * 7) + (sum by (node) (kube_node_info{node=\"titan-07\"}) * 1e-6 * 8) + (sum by (node) (kube_node_info{node=\"titan-08\"}) * 1e-6 * 9) + (sum by (node) (kube_node_info{node=\"titan-09\"}) * 1e-6 * 10) + (sum by (node) (kube_node_info{node=\"titan-10\"}) * 1e-6 * 11) + (sum by (node) (kube_node_info{node=\"titan-11\"}) * 1e-6 * 12) + (sum by (node) (kube_node_info{node=\"titan-12\"}) * 1e-6 * 13) + (sum by (node) (kube_node_info{node=\"titan-13\"}) * 1e-6 * 14) + (sum by (node) (kube_node_info{node=\"titan-14\"}) * 1e-6 * 15) + (sum by (node) (kube_node_info{node=\"titan-15\"}) * 1e-6 * 16) + (sum by (node) (kube_node_info{node=\"titan-16\"}) * 1e-6 * 17) + (sum by (node) (kube_node_info{node=\"titan-17\"}) * 1e-6 * 18) + (sum by (node) (kube_node_info{node=\"titan-18\"}) * 1e-6 * 19) + (sum by (node) (kube_node_info{node=\"titan-19\"}) * 1e-6 * 20) + (sum by (node) (kube_node_info{node=\"titan-22\"}) * 1e-6 * 21) + (sum by (node) (kube_node_info{node=\"titan-24\"}) * 1e-6 * 22)) == bool on(namespace) group_left() (max by (namespace) ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) + on(node) group_left() ((sum by (node) (kube_node_info{node=\"titan-0a\"}) * 1e-6 * 1) + (sum by (node) (kube_node_info{node=\"titan-0b\"}) * 1e-6 * 2) + (sum by (node) (kube_node_info{node=\"titan-0c\"}) * 1e-6 * 3) + (sum by (node) (kube_node_info{node=\"titan-db\"}) * 1e-6 * 4) + (sum by (node) (kube_node_info{node=\"titan-04\"}) * 1e-6 * 5) + (sum by (node) (kube_node_info{node=\"titan-05\"}) * 1e-6 * 6) + (sum by (node) (kube_node_info{node=\"titan-06\"}) * 1e-6 * 7) + (sum by (node) (kube_node_info{node=\"titan-07\"}) * 1e-6 * 8) + (sum by (node) (kube_node_info{node=\"titan-08\"}) * 1e-6 * 9) + (sum by (node) (kube_node_info{node=\"titan-09\"}) * 1e-6 * 10) + (sum by (node) (kube_node_info{node=\"titan-10\"}) * 1e-6 * 11) + (sum by (node) (kube_node_info{node=\"titan-11\"}) * 1e-6 * 12) + (sum by (node) (kube_node_info{node=\"titan-12\"}) * 1e-6 * 13) + (sum by (node) (kube_node_info{node=\"titan-13\"}) * 1e-6 * 14) + (sum by (node) (kube_node_info{node=\"titan-14\"}) * 1e-6 * 15) + (sum by (node) (kube_node_info{node=\"titan-15\"}) * 1e-6 * 16) + (sum by (node) (kube_node_info{node=\"titan-16\"}) * 1e-6 * 17) + (sum by (node) (kube_node_info{node=\"titan-17\"}) * 1e-6 * 18) + (sum by (node) (kube_node_info{node=\"titan-18\"}) * 1e-6 * 19) + (sum by (node) (kube_node_info{node=\"titan-19\"}) * 1e-6 * 20) + (sum by (node) (kube_node_info{node=\"titan-22\"}) * 1e-6 * 21) + (sum by (node) (kube_node_info{node=\"titan-24\"}) * 1e-6 * 22))))) * on(namespace,node) group_left() ((sum by (namespace,node) (kube_pod_info{pod!=\"\" , node!=\"\"}) / on(namespace) group_left() clamp_min(sum by (namespace) (kube_pod_info{pod!=\"\"}), 1) * 100) > bool 0)", "refId": "A", "instant": true, "format": "table" } ], "fieldConfig": { "defaults": { "unit": "percent", "custom": { "filterable": false } }, "overrides": [] }, "options": { "showHeader": true, "columnFilters": false, "showColumnFilters": false, "footer": { "show": false, "fields": "", "calcs": [] } }, "transformations": [ { "id": "labelsToFields", "options": {} }, { "id": "organize", "options": { "excludeByName": { "Time": true } } }, { "id": "sortBy", "options": { "fields": [ "node", "Value" ], "order": "asc" } } ] } ], "time": { "from": "now-12h", "to": "now" }, "annotations": { "list": [] }, "schemaVersion": 39, "style": "dark", "tags": [ "atlas", "pods" ] }