2025-11-17 14:22:46 -03:00
{
"uid" : "atlas-overview" ,
"title" : "Atlas Overview" ,
2025-12-02 14:41:39 -03:00
"folderUid" : "overview" ,
2025-11-17 16:27:38 -03:00
"editable" : false ,
2025-11-17 14:22:46 -03:00
"annotations" : {
2025-11-17 16:27:38 -03:00
"list" : [ ]
2025-11-17 14:22:46 -03:00
} ,
"panels" : [
{
2025-12-12 15:56:33 -03:00
"id" : 2 ,
2025-11-18 12:11:47 -03:00
"type" : "gauge" ,
2025-12-12 15:56:33 -03:00
"title" : "Control Plane Ready" ,
2025-11-17 14:22:46 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 5 ,
2025-12-12 15:23:51 -03:00
"w" : 4 ,
2025-11-17 14:22:46 -03:00
"x" : 0 ,
"y" : 0
} ,
"targets" : [
{
2025-12-12 15:56:33 -03:00
"expr" : "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"})" ,
2025-11-17 14:22:46 -03:00
"refId" : "A"
}
] ,
"fieldConfig" : {
"defaults" : {
2025-11-18 12:11:47 -03:00
"min" : 0 ,
2025-12-12 15:56:33 -03:00
"max" : 3 ,
2025-11-17 14:22:46 -03:00
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
2025-11-17 19:49:50 -03:00
"color" : "red" ,
2025-11-18 11:12:03 -03:00
"value" : null
2025-11-17 14:22:46 -03:00
} ,
{
"color" : "green" ,
2025-12-12 15:56:33 -03:00
"value" : 3
2025-11-17 14:22:46 -03:00
}
]
2025-11-17 16:27:38 -03:00
}
2025-11-17 14:22:46 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
2025-11-18 12:11:47 -03:00
"orientation" : "auto" ,
"showThresholdMarkers" : false ,
"showThresholdLabels" : false
2025-11-17 14:22:46 -03:00
}
} ,
{
2025-12-12 15:56:33 -03:00
"id" : 3 ,
"type" : "stat" ,
"title" : "Control Plane Workloads" ,
2025-11-17 14:22:46 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 5 ,
2025-12-12 15:23:51 -03:00
"w" : 3 ,
"x" : 4 ,
2025-11-17 14:22:46 -03:00
"y" : 0
} ,
"targets" : [
{
2026-01-18 02:50:07 -03:00
"expr" : "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"}) or on() vector(0)" ,
2025-11-17 14:22:46 -03:00
"refId" : "A"
}
] ,
"fieldConfig" : {
"defaults" : {
2025-12-12 15:56:33 -03:00
"color" : {
2025-12-12 20:44:20 -03:00
"mode" : "thresholds"
2025-12-12 15:56:33 -03:00
} ,
"mappings" : [ ] ,
2025-11-17 14:22:46 -03:00
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
2025-12-12 15:56:33 -03:00
"color" : "green" ,
2025-12-12 20:50:41 -03:00
"value" : null
2025-11-17 14:22:46 -03:00
} ,
2025-12-12 15:56:33 -03:00
{
2025-12-12 20:50:41 -03:00
"color" : "yellow" ,
2025-12-12 20:30:00 -03:00
"value" : 1
2025-12-12 20:50:41 -03:00
} ,
{
"color" : "orange" ,
"value" : 2
} ,
{
"color" : "red" ,
"value" : 3
2025-11-17 14:22:46 -03:00
}
]
2025-12-12 15:56:33 -03:00
} ,
"unit" : "none" ,
"custom" : {
"displayMode" : "auto"
2025-11-17 16:27:38 -03:00
}
2025-11-17 14:22:46 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
2025-12-12 15:56:33 -03:00
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
2025-11-17 14:22:46 -03:00
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
2025-12-12 15:56:33 -03:00
"textMode" : "value"
} ,
"links" : [
{
"title" : "Open atlas-pods dashboard" ,
"url" : "/d/atlas-pods" ,
"targetBlank" : true
}
]
2025-11-17 14:22:46 -03:00
} ,
{
2025-12-12 15:56:33 -03:00
"id" : 5 ,
2025-11-18 17:09:13 -03:00
"type" : "stat" ,
2025-12-12 15:56:33 -03:00
"title" : "Stuck Terminating" ,
2025-11-17 14:22:46 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 5 ,
2025-12-12 15:23:51 -03:00
"w" : 3 ,
"x" : 7 ,
2025-11-17 14:22:46 -03:00
"y" : 0
} ,
"targets" : [
{
2025-12-12 20:30:00 -03:00
"expr" : "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0))) or on() vector(0)" ,
2025-11-17 14:22:46 -03:00
"refId" : "A"
}
] ,
"fieldConfig" : {
"defaults" : {
2025-11-18 17:09:13 -03:00
"color" : {
2025-12-12 20:44:20 -03:00
"mode" : "thresholds"
2025-11-18 17:09:13 -03:00
} ,
"mappings" : [ ] ,
2025-11-17 14:22:46 -03:00
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
2025-11-17 19:49:50 -03:00
"color" : "green" ,
2025-12-12 20:50:41 -03:00
"value" : null
2025-11-17 14:22:46 -03:00
} ,
2025-11-17 19:49:50 -03:00
{
2025-12-12 20:50:41 -03:00
"color" : "yellow" ,
2025-12-12 20:30:00 -03:00
"value" : 1
2025-12-12 20:50:41 -03:00
} ,
{
"color" : "orange" ,
"value" : 2
} ,
{
"color" : "red" ,
"value" : 3
2025-11-17 14:22:46 -03:00
}
]
2025-11-18 17:09:13 -03:00
} ,
"unit" : "none" ,
"custom" : {
"displayMode" : "auto"
2025-11-17 16:27:38 -03:00
}
2025-11-17 14:22:46 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
2025-11-18 17:09:13 -03:00
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
2025-11-17 14:22:46 -03:00
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
2025-11-18 17:09:13 -03:00
"textMode" : "value"
2025-11-17 19:49:50 -03:00
} ,
"links" : [
{
"title" : "Open atlas-pods dashboard" ,
"url" : "/d/atlas-pods" ,
"targetBlank" : true
}
]
2025-11-17 14:22:46 -03:00
} ,
2025-12-12 15:23:51 -03:00
{
"id" : 27 ,
"type" : "stat" ,
2025-12-19 13:46:34 -03:00
"title" : "Atlas Availability" ,
2025-12-12 15:23:51 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 5 ,
"w" : 4 ,
"x" : 10 ,
"y" : 0
} ,
"targets" : [
{
2025-12-19 14:56:29 -03:00
"expr" : "avg_over_time((min(((sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-0a|titan-0b|titan-0c\"}) / 3)), ((sum(kube_deployment_status_replicas_available{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}) / clamp_min(sum(kube_deployment_spec_replicas{namespace=~\"traefik|kube-system\",deployment=\"traefik\"}), 1)))))[365d:1h])" ,
2025-12-12 15:23:51 -03:00
"refId" : "A"
}
] ,
"fieldConfig" : {
"defaults" : {
"color" : {
2025-12-12 20:44:20 -03:00
"mode" : "thresholds"
2025-12-12 15:23:51 -03:00
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "red" ,
"value" : null
} ,
{
"color" : "orange" ,
2025-12-15 22:14:26 -03:00
"value" : 0.99
2025-12-12 15:23:51 -03:00
} ,
{
"color" : "yellow" ,
2025-12-15 22:14:26 -03:00
"value" : 0.999
2025-12-12 15:23:51 -03:00
} ,
{
"color" : "green" ,
2025-12-15 22:14:26 -03:00
"value" : 0.9999
} ,
{
"color" : "blue" ,
2025-12-12 20:50:41 -03:00
"value" : 0.99999
2025-12-12 15:23:51 -03:00
}
]
} ,
2025-12-12 16:15:37 -03:00
"unit" : "percentunit" ,
2025-12-12 15:23:51 -03:00
"custom" : {
2025-12-12 16:36:47 -03:00
"displayMode" : "auto"
2025-12-12 16:15:37 -03:00
} ,
2025-12-19 15:18:14 -03:00
"decimals" : 4
2025-12-12 15:23:51 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
"textMode" : "value"
}
} ,
2025-11-17 14:22:46 -03:00
{
"id" : 4 ,
2025-12-02 14:41:39 -03:00
"type" : "stat" ,
"title" : "Problem Pods" ,
2025-11-17 14:22:46 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 5 ,
2025-12-12 15:23:51 -03:00
"w" : 3 ,
"x" : 14 ,
2025-11-17 14:22:46 -03:00
"y" : 0
} ,
"targets" : [
{
2025-12-12 20:30:00 -03:00
"expr" : "sum(max by (namespace,pod) (kube_pod_status_phase{phase!~\"Running|Succeeded\"})) or on() vector(0)" ,
2025-11-17 14:22:46 -03:00
"refId" : "A"
}
] ,
"fieldConfig" : {
"defaults" : {
2025-12-02 14:41:39 -03:00
"color" : {
2025-12-12 20:44:20 -03:00
"mode" : "thresholds"
2025-12-02 14:41:39 -03:00
} ,
"mappings" : [ ] ,
2025-11-17 14:22:46 -03:00
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
2025-11-17 16:27:38 -03:00
"color" : "green" ,
2025-12-12 20:50:41 -03:00
"value" : null
2025-11-17 14:22:46 -03:00
} ,
2025-11-17 19:24:03 -03:00
{
2025-12-12 20:50:41 -03:00
"color" : "yellow" ,
2025-12-12 20:30:00 -03:00
"value" : 1
2025-12-12 20:50:41 -03:00
} ,
{
"color" : "orange" ,
"value" : 2
} ,
{
"color" : "red" ,
"value" : 3
2025-11-17 14:22:46 -03:00
}
]
2025-12-02 14:41:39 -03:00
} ,
"unit" : "none" ,
"custom" : {
"displayMode" : "auto"
2025-12-12 15:23:51 -03:00
}
} ,
"overrides" : [ ]
} ,
"options" : {
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
"textMode" : "value"
} ,
"links" : [
{
"title" : "Open atlas-pods dashboard" ,
"url" : "/d/atlas-pods" ,
"targetBlank" : true
}
]
} ,
{
"id" : 6 ,
"type" : "stat" ,
"title" : "CrashLoop / ImagePull" ,
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 5 ,
"w" : 3 ,
"x" : 17 ,
"y" : 0
} ,
"targets" : [
{
2025-12-12 20:30:00 -03:00
"expr" : "sum(max by (namespace,pod) (kube_pod_container_status_waiting_reason{reason=~\"CrashLoopBackOff|ImagePullBackOff\"})) or on() vector(0)" ,
2025-12-12 15:23:51 -03:00
"refId" : "A"
}
] ,
"fieldConfig" : {
"defaults" : {
"color" : {
2025-12-12 20:44:20 -03:00
"mode" : "thresholds"
2025-12-12 15:23:51 -03:00
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
2025-12-12 20:50:41 -03:00
"value" : null
2025-12-12 15:23:51 -03:00
} ,
{
2025-12-12 20:50:41 -03:00
"color" : "yellow" ,
2025-12-12 20:30:00 -03:00
"value" : 1
2025-12-12 20:50:41 -03:00
} ,
{
"color" : "orange" ,
"value" : 2
} ,
{
"color" : "red" ,
"value" : 3
2025-12-12 15:23:51 -03:00
}
]
} ,
"unit" : "none" ,
"custom" : {
"displayMode" : "auto"
2025-11-17 16:27:38 -03:00
}
2025-11-17 14:22:46 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
2025-12-02 14:41:39 -03:00
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
2025-11-17 14:22:46 -03:00
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
2025-12-02 14:41:39 -03:00
"textMode" : "value"
2025-11-17 16:27:38 -03:00
} ,
"links" : [
{
"title" : "Open atlas-pods dashboard" ,
"url" : "/d/atlas-pods" ,
"targetBlank" : true
}
]
2025-11-17 14:22:46 -03:00
} ,
{
2025-12-12 15:56:33 -03:00
"id" : 1 ,
"type" : "gauge" ,
"title" : "Workers Ready" ,
2025-11-17 14:22:46 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 5 ,
"w" : 4 ,
2025-11-18 15:55:24 -03:00
"x" : 20 ,
2025-11-17 14:22:46 -03:00
"y" : 0
} ,
"targets" : [
{
2026-01-21 14:30:55 -03:00
"expr" : "sum(kube_node_status_condition{condition=\"Ready\",status=\"true\",node=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"})" ,
2025-11-17 14:22:46 -03:00
"refId" : "A"
}
] ,
"fieldConfig" : {
"defaults" : {
2025-12-12 15:56:33 -03:00
"min" : 0 ,
2026-01-21 14:30:55 -03:00
"max" : 20 ,
2025-11-17 14:22:46 -03:00
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
2025-12-12 15:56:33 -03:00
"color" : "red" ,
2025-11-18 11:12:03 -03:00
"value" : null
2025-11-17 14:22:46 -03:00
} ,
{
2025-12-12 15:56:33 -03:00
"color" : "orange" ,
2026-01-21 14:30:55 -03:00
"value" : 18
2025-11-17 19:24:03 -03:00
} ,
{
2025-12-12 15:56:33 -03:00
"color" : "yellow" ,
2026-01-21 14:30:55 -03:00
"value" : 19
2025-11-17 19:24:03 -03:00
} ,
{
2025-12-12 15:56:33 -03:00
"color" : "green" ,
2026-01-21 14:30:55 -03:00
"value" : 20
2025-11-17 14:22:46 -03:00
}
]
2025-11-17 16:27:38 -03:00
}
2025-11-17 14:22:46 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
2025-12-12 15:56:33 -03:00
"orientation" : "auto" ,
"showThresholdMarkers" : false ,
"showThresholdLabels" : false
}
2025-11-17 14:22:46 -03:00
} ,
{
"id" : 7 ,
"type" : "stat" ,
"title" : "Hottest node: CPU" ,
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2025-12-02 15:12:16 -03:00
"h" : 3 ,
2025-11-17 16:27:38 -03:00
"w" : 6 ,
"x" : 0 ,
"y" : 5
2025-11-17 14:22:46 -03:00
} ,
"targets" : [
{
2025-11-17 23:42:55 -03:00
"expr" : "label_replace(topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")" ,
2025-11-17 19:24:03 -03:00
"refId" : "A" ,
2025-11-17 20:14:11 -03:00
"legendFormat" : "{{node}}" ,
2025-11-17 19:38:40 -03:00
"instant" : true
2025-11-17 14:22:46 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
"color" : {
2025-12-12 20:44:20 -03:00
"mode" : "thresholds"
2025-11-17 14:22:46 -03:00
} ,
"mappings" : [ ] ,
"thresholds" : {
2025-12-12 21:13:31 -03:00
"mode" : "absolute" ,
2025-11-17 14:22:46 -03:00
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "yellow" ,
2025-12-12 21:13:31 -03:00
"value" : 50
} ,
{
"color" : "orange" ,
"value" : 75
2025-11-17 14:22:46 -03:00
} ,
{
"color" : "red" ,
2025-12-12 21:13:31 -03:00
"value" : 91.5
2025-11-17 14:22:46 -03:00
}
]
} ,
2025-11-17 16:27:38 -03:00
"unit" : "percent" ,
"custom" : {
"displayMode" : "auto"
2025-11-17 19:56:57 -03:00
}
2025-11-17 14:22:46 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
2025-11-17 19:49:50 -03:00
"textMode" : "name_and_value"
2025-11-17 16:27:38 -03:00
} ,
"links" : [
{
"title" : "Open atlas-nodes dashboard" ,
"url" : "/d/atlas-nodes" ,
"targetBlank" : true
}
]
2025-11-17 14:22:46 -03:00
} ,
{
"id" : 8 ,
"type" : "stat" ,
"title" : "Hottest node: RAM" ,
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2025-12-02 15:12:16 -03:00
"h" : 3 ,
2025-11-17 16:27:38 -03:00
"w" : 6 ,
"x" : 6 ,
"y" : 5
2025-11-17 14:22:46 -03:00
} ,
"targets" : [
{
2025-11-17 23:42:55 -03:00
"expr" : "label_replace(topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")" ,
2025-11-17 19:24:03 -03:00
"refId" : "A" ,
2025-11-17 20:14:11 -03:00
"legendFormat" : "{{node}}" ,
2025-11-17 19:38:40 -03:00
"instant" : true
2025-11-17 14:22:46 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
"color" : {
2025-12-12 20:44:20 -03:00
"mode" : "thresholds"
2025-11-17 14:22:46 -03:00
} ,
"mappings" : [ ] ,
"thresholds" : {
2025-12-12 21:13:31 -03:00
"mode" : "absolute" ,
2025-11-17 14:22:46 -03:00
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "yellow" ,
2025-12-12 21:13:31 -03:00
"value" : 50
} ,
{
"color" : "orange" ,
"value" : 75
2025-11-17 14:22:46 -03:00
} ,
{
"color" : "red" ,
2025-12-12 21:13:31 -03:00
"value" : 91.5
2025-11-17 14:22:46 -03:00
}
]
} ,
2025-11-17 16:27:38 -03:00
"unit" : "percent" ,
"custom" : {
"displayMode" : "auto"
2025-11-17 19:56:57 -03:00
}
2025-11-17 14:22:46 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
2025-11-17 19:49:50 -03:00
"textMode" : "name_and_value"
2025-11-17 16:27:38 -03:00
} ,
"links" : [
{
"title" : "Open atlas-nodes dashboard" ,
"url" : "/d/atlas-nodes" ,
"targetBlank" : true
}
]
2025-11-17 14:22:46 -03:00
} ,
{
"id" : 9 ,
2025-11-17 16:27:38 -03:00
"type" : "stat" ,
2025-11-17 20:00:40 -03:00
"title" : "Hottest node: NET (rx+tx)" ,
2025-11-17 16:27:38 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2025-12-02 15:12:16 -03:00
"h" : 3 ,
2025-11-17 16:27:38 -03:00
"w" : 6 ,
"x" : 12 ,
"y" : 5
} ,
"targets" : [
{
2025-11-17 23:42:55 -03:00
"expr" : "label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_network_receive_bytes_total{device!~\"lo\"}[5m]) + rate(node_network_transmit_bytes_total{device!~\"lo\"}[5m]))) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")" ,
2025-11-17 19:24:03 -03:00
"refId" : "A" ,
2025-11-17 20:14:11 -03:00
"legendFormat" : "{{node}}" ,
2025-11-17 19:38:40 -03:00
"instant" : true
2025-11-17 16:27:38 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
"color" : {
2025-12-12 20:44:20 -03:00
"mode" : "thresholds"
2025-11-17 16:27:38 -03:00
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "rgba(115, 115, 115, 1)" ,
"value" : null
} ,
{
"color" : "green" ,
"value" : 1
}
]
} ,
2025-11-17 18:55:11 -03:00
"unit" : "Bps" ,
2025-11-17 16:27:38 -03:00
"custom" : {
"displayMode" : "auto"
2025-11-17 19:56:57 -03:00
}
2025-11-17 16:27:38 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
2025-11-17 19:49:50 -03:00
"textMode" : "name_and_value"
2025-11-17 16:27:38 -03:00
} ,
"links" : [
{
"title" : "Open atlas-nodes dashboard" ,
"url" : "/d/atlas-nodes" ,
"targetBlank" : true
}
]
} ,
{
"id" : 10 ,
"type" : "stat" ,
2025-11-17 20:00:40 -03:00
"title" : "Hottest node: I/O (r+w)" ,
2025-11-17 16:27:38 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2025-12-02 15:12:16 -03:00
"h" : 3 ,
2025-11-17 16:27:38 -03:00
"w" : 6 ,
"x" : 18 ,
"y" : 5
} ,
"targets" : [
{
2025-11-17 23:42:55 -03:00
"expr" : "label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m]))) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))), \"__name__\", \"$1\", \"node\", \"(.*)\")" ,
2025-11-17 19:24:03 -03:00
"refId" : "A" ,
2025-11-17 20:14:11 -03:00
"legendFormat" : "{{node}}" ,
2025-11-17 19:38:40 -03:00
"instant" : true
2025-11-17 16:27:38 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
"color" : {
2025-12-12 20:44:20 -03:00
"mode" : "thresholds"
2025-11-17 16:27:38 -03:00
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "rgba(115, 115, 115, 1)" ,
"value" : null
} ,
{
"color" : "green" ,
"value" : 1
}
]
} ,
2025-11-17 18:55:11 -03:00
"unit" : "Bps" ,
2025-11-17 16:27:38 -03:00
"custom" : {
"displayMode" : "auto"
2025-11-17 19:56:57 -03:00
}
2025-11-17 16:27:38 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
2025-11-17 19:49:50 -03:00
"textMode" : "name_and_value"
2025-11-17 16:27:38 -03:00
} ,
"links" : [
{
"title" : "Open atlas-nodes dashboard" ,
"url" : "/d/atlas-nodes" ,
"targetBlank" : true
}
]
} ,
2026-01-05 21:55:59 -03:00
{
"id" : 30 ,
"type" : "stat" ,
2026-01-06 02:34:52 -03:00
"title" : "Mail Sent (1d)" ,
2026-01-05 21:55:59 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2026-01-21 13:37:36 -03:00
"h" : 3 ,
2026-01-21 14:30:55 -03:00
"w" : 6 ,
2026-01-05 21:55:59 -03:00
"x" : 0 ,
"y" : 8
} ,
"targets" : [
{
2026-01-06 02:34:52 -03:00
"expr" : "max(postmark_outbound_sent{window=\"1d\"})" ,
2026-01-05 21:55:59 -03:00
"refId" : "A"
}
] ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "thresholds"
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
2026-01-06 02:34:52 -03:00
"color" : "rgba(115, 115, 115, 1)" ,
2026-01-05 21:55:59 -03:00
"value" : null
} ,
{
2026-01-06 02:34:52 -03:00
"color" : "green" ,
"value" : 1
2026-01-05 21:55:59 -03:00
}
]
} ,
2026-01-06 02:34:52 -03:00
"unit" : "none" ,
2026-01-05 21:55:59 -03:00
"custom" : {
"displayMode" : "auto"
2026-01-06 02:34:52 -03:00
}
2026-01-05 21:55:59 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
"textMode" : "value"
} ,
"links" : [
{
"title" : "Open atlas-mail dashboard" ,
"url" : "/d/atlas-mail" ,
"targetBlank" : true
}
]
} ,
{
"id" : 31 ,
"type" : "stat" ,
2026-01-06 02:34:52 -03:00
"title" : "Mail Bounces (1d)" ,
2026-01-05 21:55:59 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2026-01-21 13:37:36 -03:00
"h" : 3 ,
2026-01-21 14:30:55 -03:00
"w" : 6 ,
"x" : 12 ,
2026-01-05 21:55:59 -03:00
"y" : 8
} ,
"targets" : [
2026-01-06 02:34:52 -03:00
{
"expr" : "max(postmark_outbound_bounce_rate{window=\"1d\"})" ,
"refId" : "A" ,
"legendFormat" : "Rate"
} ,
2026-01-05 21:55:59 -03:00
{
2026-01-06 02:06:20 -03:00
"expr" : "max(postmark_outbound_bounced{window=\"1d\"})" ,
2026-01-06 02:34:52 -03:00
"refId" : "B" ,
"legendFormat" : "Count"
2026-01-05 21:55:59 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "thresholds"
} ,
2026-01-06 02:34:52 -03:00
"custom" : {
"displayMode" : "auto"
} ,
2026-01-05 21:55:59 -03:00
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "yellow" ,
2026-01-06 02:34:52 -03:00
"value" : 5
2026-01-05 21:55:59 -03:00
} ,
{
"color" : "orange" ,
2026-01-06 02:34:52 -03:00
"value" : 8
2026-01-05 21:55:59 -03:00
} ,
{
"color" : "red" ,
2026-01-06 02:34:52 -03:00
"value" : 10
2026-01-05 21:55:59 -03:00
}
]
} ,
2026-01-06 02:34:52 -03:00
"unit" : "none"
2026-01-05 21:55:59 -03:00
} ,
2026-01-06 02:34:52 -03:00
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "Rate"
} ,
"properties" : [
{
"id" : "unit" ,
"value" : "percent"
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "Count"
} ,
"properties" : [
{
"id" : "unit" ,
"value" : "none"
}
]
}
]
2026-01-05 21:55:59 -03:00
} ,
"options" : {
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
2026-01-06 02:34:52 -03:00
"textMode" : "name_and_value"
2026-01-05 21:55:59 -03:00
} ,
"links" : [
{
"title" : "Open atlas-mail dashboard" ,
"url" : "/d/atlas-mail" ,
"targetBlank" : true
}
]
} ,
{
"id" : 32 ,
"type" : "stat" ,
2026-01-06 02:34:52 -03:00
"title" : "Mail Success Rate (1d)" ,
2026-01-05 21:55:59 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2026-01-21 13:37:36 -03:00
"h" : 3 ,
2026-01-21 14:30:55 -03:00
"w" : 6 ,
"x" : 6 ,
2026-01-05 21:55:59 -03:00
"y" : 8
} ,
"targets" : [
{
2026-01-06 02:34:52 -03:00
"expr" : "clamp_min(100 - max(postmark_outbound_bounce_rate{window=\"1d\"}), 0)" ,
2026-01-05 21:55:59 -03:00
"refId" : "A"
}
] ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "thresholds"
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
2026-01-06 02:34:52 -03:00
"color" : "red" ,
2026-01-05 21:55:59 -03:00
"value" : null
} ,
2026-01-06 02:06:20 -03:00
{
"color" : "orange" ,
2026-01-06 02:34:52 -03:00
"value" : 90
2026-01-06 02:06:20 -03:00
} ,
{
2026-01-06 02:34:52 -03:00
"color" : "yellow" ,
2026-01-06 02:06:20 -03:00
"value" : 95
2026-01-06 02:34:52 -03:00
} ,
{
"color" : "green" ,
"value" : 98
2026-01-05 21:55:59 -03:00
}
]
} ,
2026-01-06 02:06:20 -03:00
"unit" : "percent" ,
2026-01-05 21:55:59 -03:00
"custom" : {
"displayMode" : "auto"
} ,
2026-01-06 02:06:20 -03:00
"decimals" : 1
2026-01-05 21:55:59 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
"textMode" : "value"
} ,
"links" : [
{
"title" : "Open atlas-mail dashboard" ,
"url" : "/d/atlas-mail" ,
"targetBlank" : true
}
]
} ,
{
"id" : 33 ,
"type" : "stat" ,
2026-01-06 02:34:52 -03:00
"title" : "Mail Limit Used (30d)" ,
2026-01-05 21:55:59 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2026-01-21 13:37:36 -03:00
"h" : 3 ,
2026-01-21 14:30:55 -03:00
"w" : 6 ,
"x" : 18 ,
2026-01-05 21:55:59 -03:00
"y" : 8
} ,
"targets" : [
{
2026-01-06 02:34:52 -03:00
"expr" : "max(postmark_sending_limit_used_percent)" ,
2026-01-05 21:55:59 -03:00
"refId" : "A"
}
] ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "thresholds"
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
2026-01-06 02:34:52 -03:00
"color" : "green" ,
2026-01-05 21:55:59 -03:00
"value" : null
} ,
{
2026-01-06 02:34:52 -03:00
"color" : "yellow" ,
"value" : 70
} ,
{
"color" : "orange" ,
"value" : 85
} ,
{
"color" : "red" ,
"value" : 95
2026-01-05 21:55:59 -03:00
}
]
} ,
2026-01-06 02:34:52 -03:00
"unit" : "percent" ,
2026-01-05 21:55:59 -03:00
"custom" : {
"displayMode" : "auto"
} ,
2026-01-06 02:34:52 -03:00
"decimals" : 1
2026-01-05 21:55:59 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
"textMode" : "value"
} ,
"links" : [
{
"title" : "Open atlas-mail dashboard" ,
"url" : "/d/atlas-mail" ,
"targetBlank" : true
}
]
} ,
2025-11-17 16:27:38 -03:00
{
2025-11-18 14:08:33 -03:00
"id" : 23 ,
"type" : "stat" ,
2025-12-02 14:41:39 -03:00
"title" : "Astreae Usage" ,
2025-11-17 14:22:46 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2026-01-21 14:30:55 -03:00
"h" : 3 ,
2025-11-18 14:08:33 -03:00
"w" : 6 ,
2025-11-17 14:22:46 -03:00
"x" : 0 ,
2026-01-21 13:37:36 -03:00
"y" : 11
2025-11-17 14:22:46 -03:00
} ,
"targets" : [
{
2025-11-18 14:08:33 -03:00
"expr" : "100 - (sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"}) * 100)" ,
"refId" : "A"
2025-11-17 14:22:46 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
2025-11-18 14:08:33 -03:00
"color" : {
2025-12-12 20:44:20 -03:00
"mode" : "thresholds"
2025-11-18 14:08:33 -03:00
} ,
"mappings" : [ ] ,
"thresholds" : {
2025-12-12 21:13:31 -03:00
"mode" : "absolute" ,
2025-11-18 14:08:33 -03:00
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "yellow" ,
2025-12-12 21:13:31 -03:00
"value" : 50
} ,
{
"color" : "orange" ,
"value" : 75
2025-11-18 14:08:33 -03:00
} ,
{
"color" : "red" ,
2025-12-12 21:13:31 -03:00
"value" : 91.5
2025-11-18 14:08:33 -03:00
}
]
} ,
"unit" : "percent" ,
"custom" : {
"displayMode" : "auto"
}
2025-11-17 14:22:46 -03:00
} ,
2025-11-17 22:39:50 -03:00
"overrides" : [ ]
2025-11-17 14:22:46 -03:00
} ,
"options" : {
2025-11-18 14:08:33 -03:00
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
2025-11-17 14:22:46 -03:00
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
2025-11-18 14:08:33 -03:00
} ,
"textMode" : "value"
} ,
"links" : [
{
"title" : "Open atlas-storage dashboard" ,
"url" : "/d/atlas-storage" ,
"targetBlank" : true
2025-11-17 14:22:46 -03:00
}
2025-11-18 14:08:33 -03:00
]
2025-11-17 14:22:46 -03:00
} ,
{
2025-11-18 14:08:33 -03:00
"id" : 24 ,
"type" : "stat" ,
2025-12-02 14:41:39 -03:00
"title" : "Asteria Usage" ,
2025-11-17 23:12:16 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2026-01-21 14:30:55 -03:00
"h" : 3 ,
2025-11-18 14:08:33 -03:00
"w" : 6 ,
"x" : 6 ,
2026-01-21 13:37:36 -03:00
"y" : 11
2025-11-17 23:12:16 -03:00
} ,
"targets" : [
{
2025-11-18 14:08:33 -03:00
"expr" : "100 - (sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"}) / sum(node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"}) * 100)" ,
"refId" : "A"
2025-11-17 23:12:16 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
2025-11-18 14:08:33 -03:00
"color" : {
2025-12-12 20:44:20 -03:00
"mode" : "thresholds"
2025-11-18 14:08:33 -03:00
} ,
"mappings" : [ ] ,
"thresholds" : {
2025-12-12 21:13:31 -03:00
"mode" : "absolute" ,
2025-11-18 14:08:33 -03:00
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "yellow" ,
2025-12-12 21:13:31 -03:00
"value" : 50
} ,
{
"color" : "orange" ,
"value" : 75
2025-11-18 14:08:33 -03:00
} ,
{
"color" : "red" ,
2025-12-12 21:13:31 -03:00
"value" : 91.5
2025-11-18 14:08:33 -03:00
}
]
} ,
"unit" : "percent" ,
"custom" : {
"displayMode" : "auto"
}
2025-11-17 23:12:16 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
2025-11-18 14:08:33 -03:00
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
2025-11-17 23:12:16 -03:00
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
2025-11-18 14:08:33 -03:00
} ,
"textMode" : "value"
} ,
"links" : [
{
"title" : "Open atlas-storage dashboard" ,
"url" : "/d/atlas-storage" ,
"targetBlank" : true
2025-11-17 23:12:16 -03:00
}
2025-11-18 14:08:33 -03:00
]
2025-11-17 23:12:16 -03:00
} ,
{
2025-11-18 14:08:33 -03:00
"id" : 25 ,
"type" : "stat" ,
2025-12-02 14:41:39 -03:00
"title" : "Astreae Free" ,
2025-11-18 00:11:39 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2026-01-21 14:30:55 -03:00
"h" : 3 ,
2025-11-18 14:08:33 -03:00
"w" : 6 ,
"x" : 12 ,
2026-01-21 13:37:36 -03:00
"y" : 11
2025-11-18 00:11:39 -03:00
} ,
"targets" : [
{
2025-11-18 14:08:33 -03:00
"expr" : "sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})" ,
"refId" : "A"
2025-11-18 00:11:39 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
2025-11-18 14:08:33 -03:00
"color" : {
2025-12-12 20:44:20 -03:00
"mode" : "thresholds"
2025-11-18 14:08:33 -03:00
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "rgba(115, 115, 115, 1)" ,
"value" : null
} ,
{
"color" : "green" ,
"value" : 1
}
]
} ,
"unit" : "decbytes" ,
"custom" : {
"displayMode" : "auto"
}
2025-11-18 00:11:39 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
2025-11-18 14:08:33 -03:00
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
2025-11-18 00:11:39 -03:00
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
2025-11-18 14:08:33 -03:00
} ,
"textMode" : "value"
} ,
"links" : [
{
"title" : "Open atlas-storage dashboard" ,
"url" : "/d/atlas-storage" ,
"targetBlank" : true
2025-11-18 00:11:39 -03:00
}
2025-11-18 14:08:33 -03:00
]
2025-11-18 00:11:39 -03:00
} ,
{
2025-11-18 14:08:33 -03:00
"id" : 26 ,
"type" : "stat" ,
2025-12-02 14:41:39 -03:00
"title" : "Asteria Free" ,
2025-11-17 14:22:46 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2026-01-21 14:30:55 -03:00
"h" : 3 ,
2025-11-18 14:08:33 -03:00
"w" : 6 ,
"x" : 18 ,
2026-01-21 13:37:36 -03:00
"y" : 11
2025-11-17 14:22:46 -03:00
} ,
"targets" : [
{
2025-11-18 14:08:33 -03:00
"expr" : "sum(node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})" ,
"refId" : "A"
2025-11-17 14:22:46 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
2025-11-18 14:08:33 -03:00
"color" : {
2025-12-12 20:44:20 -03:00
"mode" : "thresholds"
2025-11-18 14:08:33 -03:00
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "rgba(115, 115, 115, 1)" ,
"value" : null
} ,
{
"color" : "green" ,
"value" : 1
}
]
} ,
"unit" : "decbytes" ,
"custom" : {
"displayMode" : "auto"
}
} ,
"overrides" : [ ]
} ,
"options" : {
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "center" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
"textMode" : "value"
} ,
"links" : [
{
"title" : "Open atlas-storage dashboard" ,
"url" : "/d/atlas-storage" ,
"targetBlank" : true
}
]
} ,
2026-01-21 13:37:36 -03:00
{
"id" : 40 ,
"type" : "bargauge" ,
"title" : "One-off Job Pods (age hours)" ,
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 6 ,
2026-01-21 14:30:55 -03:00
"w" : 6 ,
2026-01-21 13:37:36 -03:00
"x" : 0 ,
2026-01-21 14:30:55 -03:00
"y" : 14
2026-01-21 13:37:36 -03:00
} ,
"targets" : [
{
"expr" : "((time() - kube_pod_start_time{pod!=\"\"}) / 3600) * on(namespace,pod) group_left(owner_name) (kube_pod_owner{owner_kind=\"Job\"} unless on(namespace, owner_name) label_replace(kube_job_owner{owner_kind=\"CronJob\"}, \"owner_name\", \"$1\", \"job_name\", \"(.*)\")) * on(namespace,pod) group_left(phase) max by (namespace,pod,phase) (kube_pod_status_phase{phase=~\"Running|Succeeded\"})" ,
"refId" : "A" ,
"legendFormat" : "{{namespace}}/{{pod}}" ,
"instant" : true
}
] ,
"fieldConfig" : {
"defaults" : {
"unit" : "h" ,
"min" : 0 ,
"max" : null ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "yellow" ,
"value" : 6
} ,
{
"color" : "orange" ,
"value" : 24
} ,
{
"color" : "red" ,
"value" : 48
}
]
2026-01-21 14:30:55 -03:00
} ,
"decimals" : 2
2026-01-21 13:37:36 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"displayMode" : "gradient" ,
"orientation" : "horizontal" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
}
} ,
"transformations" : [
{
"id" : "sortBy" ,
"options" : {
"fields" : [
"Value"
] ,
"order" : "desc"
}
} ,
{
"id" : "limit" ,
"options" : {
"limit" : 8
}
}
]
} ,
{
"id" : 41 ,
"type" : "timeseries" ,
2026-01-21 14:30:55 -03:00
"title" : "Ariadne Attempts / Warnings / Failures" ,
2026-01-21 13:37:36 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 6 ,
2026-01-21 14:30:55 -03:00
"w" : 6 ,
"x" : 6 ,
"y" : 14
2026-01-21 13:37:36 -03:00
} ,
"targets" : [
{
2026-01-21 14:30:55 -03:00
"expr" : "sum(increase(ariadne_task_runs_total[$__interval]))" ,
2026-01-21 13:37:36 -03:00
"refId" : "A" ,
"legendFormat" : "Attempts"
} ,
{
2026-01-21 14:30:55 -03:00
"expr" : "sum(increase(ariadne_task_runs_total{status!~\"ok|error\"}[$__interval])) or on() vector(0)" ,
2026-01-21 13:37:36 -03:00
"refId" : "B" ,
2026-01-21 14:30:55 -03:00
"legendFormat" : "Warnings"
} ,
{
"expr" : "sum(increase(ariadne_task_runs_total{status=\"error\"}[$__interval]))" ,
"refId" : "C" ,
2026-01-21 13:37:36 -03:00
"legendFormat" : "Failures"
}
] ,
"fieldConfig" : {
"defaults" : {
"unit" : "none"
} ,
2026-01-21 14:30:55 -03:00
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "Warnings"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"mode" : "fixed" ,
"fixedColor" : "yellow"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "Failures"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"mode" : "fixed" ,
"fixedColor" : "red"
}
}
]
}
]
2026-01-21 13:37:36 -03:00
} ,
"options" : {
"legend" : {
"displayMode" : "table" ,
"placement" : "right"
} ,
"tooltip" : {
"mode" : "multi"
}
}
} ,
{
"id" : 42 ,
"type" : "timeseries" ,
"title" : "Ariadne Test Success Rate" ,
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 6 ,
2026-01-21 14:30:55 -03:00
"w" : 6 ,
2026-01-21 13:37:36 -03:00
"x" : 12 ,
2026-01-21 14:30:55 -03:00
"y" : 14
2026-01-21 13:37:36 -03:00
} ,
"targets" : [
{
"expr" : "100 * sum(max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=\"passed\"}[1h])) / clamp_min(sum(max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=~\"passed|failed|error\"}[1h])), 1)" ,
"refId" : "A"
}
] ,
"fieldConfig" : {
"defaults" : {
2026-01-21 15:01:02 -03:00
"unit" : "percent" ,
"max" : 100
2026-01-21 13:37:36 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"legend" : {
"displayMode" : "list" ,
"placement" : "bottom"
} ,
"tooltip" : {
"mode" : "multi"
}
}
} ,
{
"id" : 43 ,
"type" : "bargauge" ,
"title" : "Tests with Failures (24h)" ,
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 6 ,
2026-01-21 14:30:55 -03:00
"w" : 6 ,
"x" : 18 ,
"y" : 14
2026-01-21 13:37:36 -03:00
} ,
"targets" : [
{
"expr" : "sum by (result) (max_over_time(ariadne_ci_tests_total{repo=\"ariadne\",result=~\"failed|error\"}[24h]))" ,
"refId" : "A" ,
"legendFormat" : "{{result}}" ,
"instant" : true
}
] ,
"fieldConfig" : {
"defaults" : {
"unit" : "none" ,
"min" : 0 ,
"max" : null ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "yellow" ,
"value" : 1
} ,
{
"color" : "orange" ,
"value" : 5
} ,
{
"color" : "red" ,
"value" : 10
}
]
}
} ,
2026-01-21 15:01:02 -03:00
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "error"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"mode" : "fixed" ,
"fixedColor" : "yellow"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "failed"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"mode" : "fixed" ,
"fixedColor" : "red"
}
}
]
}
]
2026-01-21 13:37:36 -03:00
} ,
"options" : {
"displayMode" : "gradient" ,
"orientation" : "horizontal" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
}
} ,
"transformations" : [
{
"id" : "sortBy" ,
"options" : {
"fields" : [
"Value"
] ,
"order" : "desc"
}
}
]
} ,
2025-11-18 14:08:33 -03:00
{
"id" : 11 ,
"type" : "piechart" ,
2025-12-02 14:41:39 -03:00
"title" : "Namespace CPU Share" ,
2025-11-18 14:08:33 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 9 ,
"w" : 8 ,
"x" : 0 ,
2026-01-21 14:30:55 -03:00
"y" : 20
2025-11-18 14:08:33 -03:00
} ,
"targets" : [
{
2026-01-05 13:30:33 -03:00
"expr" : "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_cpu}[1m])) by (namespace) ), 1)" ,
2025-11-18 14:08:33 -03:00
"refId" : "A" ,
"legendFormat" : "{{namespace}}"
}
] ,
"fieldConfig" : {
"defaults" : {
"unit" : "percent" ,
"color" : {
"mode" : "palette-classic"
}
} ,
"overrides" : [ ]
} ,
"options" : {
"legend" : {
"displayMode" : "list" ,
"placement" : "right"
} ,
"pieType" : "pie" ,
2025-12-12 20:40:32 -03:00
"displayLabels" : [ ] ,
2025-11-18 14:08:33 -03:00
"tooltip" : {
"mode" : "single"
} ,
"colorScheme" : "interpolateSpectral" ,
"colorBy" : "value" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
}
2026-01-01 14:44:33 -03:00
} ,
"links" : [
{
"title" : "Workload namespaces only" ,
2026-01-18 02:50:07 -03:00
"url" : "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}" ,
2026-01-01 14:44:33 -03:00
"targetBlank" : false
} ,
{
"title" : "All namespaces" ,
"url" : "?var-namespace_scope_cpu=namespace%3D~%22.%2A%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}" ,
"targetBlank" : false
} ,
{
"title" : "Infrastructure namespaces only" ,
2026-01-18 02:50:07 -03:00
"url" : "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}" ,
2026-01-01 14:44:33 -03:00
"targetBlank" : false
}
] ,
2026-01-18 02:50:07 -03:00
"description" : "Shares are normalized within the selected filter. Switching scope changes the denominator."
2025-11-18 14:08:33 -03:00
} ,
{
"id" : 12 ,
"type" : "piechart" ,
2025-12-02 14:41:39 -03:00
"title" : "Namespace GPU Share" ,
2025-11-18 14:08:33 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 9 ,
"w" : 8 ,
"x" : 8 ,
2026-01-21 14:30:55 -03:00
"y" : 20
2025-11-18 14:08:33 -03:00
} ,
"targets" : [
{
2026-01-01 14:44:33 -03:00
"expr" : "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)) == 0))" ,
2025-11-18 14:08:33 -03:00
"refId" : "A" ,
"legendFormat" : "{{namespace}}"
}
] ,
"fieldConfig" : {
"defaults" : {
"unit" : "percent" ,
"color" : {
"mode" : "palette-classic"
}
} ,
"overrides" : [ ]
} ,
"options" : {
"legend" : {
"displayMode" : "list" ,
"placement" : "right"
} ,
"pieType" : "pie" ,
2025-12-12 20:40:32 -03:00
"displayLabels" : [ ] ,
2025-11-18 14:08:33 -03:00
"tooltip" : {
"mode" : "single"
} ,
"colorScheme" : "interpolateSpectral" ,
"colorBy" : "value" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
}
2026-01-01 14:44:33 -03:00
} ,
"links" : [
{
"title" : "Workload namespaces only" ,
2026-01-18 02:50:07 -03:00
"url" : "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}" ,
2026-01-01 14:44:33 -03:00
"targetBlank" : false
} ,
{
"title" : "All namespaces" ,
"url" : "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22.%2A%22&var-namespace_scope_ram=${namespace_scope_ram}" ,
"targetBlank" : false
} ,
{
"title" : "Infrastructure namespaces only" ,
2026-01-18 02:50:07 -03:00
"url" : "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}" ,
2026-01-01 14:44:33 -03:00
"targetBlank" : false
}
] ,
2026-01-18 02:50:07 -03:00
"description" : "Shares are normalized within the selected filter. Switching scope changes the denominator."
2025-11-18 14:08:33 -03:00
} ,
{
"id" : 13 ,
"type" : "piechart" ,
2025-12-02 14:41:39 -03:00
"title" : "Namespace RAM Share" ,
2025-11-18 14:08:33 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 9 ,
"w" : 8 ,
"x" : 16 ,
2026-01-21 14:30:55 -03:00
"y" : 20
2025-11-18 14:08:33 -03:00
} ,
"targets" : [
{
2026-01-05 13:30:33 -03:00
"expr" : "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",container!=\"POD\",$namespace_scope_ram}) by (namespace) ), 1)" ,
2025-11-18 14:08:33 -03:00
"refId" : "A" ,
"legendFormat" : "{{namespace}}"
}
] ,
"fieldConfig" : {
"defaults" : {
"unit" : "percent" ,
"color" : {
"mode" : "palette-classic"
}
} ,
"overrides" : [ ]
} ,
"options" : {
"legend" : {
"displayMode" : "list" ,
"placement" : "right"
} ,
"pieType" : "pie" ,
2025-12-12 20:40:32 -03:00
"displayLabels" : [ ] ,
2025-11-18 14:08:33 -03:00
"tooltip" : {
"mode" : "single"
} ,
"colorScheme" : "interpolateSpectral" ,
"colorBy" : "value" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
}
2026-01-01 14:44:33 -03:00
} ,
"links" : [
{
"title" : "Workload namespaces only" ,
2026-01-18 02:50:07 -03:00
"url" : "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22" ,
2026-01-01 14:44:33 -03:00
"targetBlank" : false
} ,
{
"title" : "All namespaces" ,
"url" : "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22.%2A%22" ,
"targetBlank" : false
} ,
{
"title" : "Infrastructure namespaces only" ,
2026-01-18 02:50:07 -03:00
"url" : "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22" ,
2026-01-01 14:44:33 -03:00
"targetBlank" : false
}
] ,
2026-01-18 02:50:07 -03:00
"description" : "Shares are normalized within the selected filter. Switching scope changes the denominator."
2025-11-18 14:08:33 -03:00
} ,
{
"id" : 14 ,
"type" : "timeseries" ,
2025-12-02 14:41:39 -03:00
"title" : "Worker Node CPU" ,
2025-11-18 14:08:33 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2025-12-02 15:15:21 -03:00
"h" : 12 ,
2025-11-18 14:08:33 -03:00
"w" : 12 ,
"x" : 0 ,
2026-01-21 14:30:55 -03:00
"y" : 36
2025-11-18 14:08:33 -03:00
} ,
"targets" : [
{
2026-01-21 14:30:55 -03:00
"expr" : "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")" ,
2025-11-18 14:08:33 -03:00
"refId" : "A" ,
"legendFormat" : "{{node}}"
}
] ,
"fieldConfig" : {
"defaults" : {
"unit" : "percent"
} ,
"overrides" : [ ]
} ,
"options" : {
"legend" : {
"displayMode" : "table" ,
"placement" : "right" ,
"calcs" : [
"last"
]
} ,
"tooltip" : {
"mode" : "multi"
}
} ,
"links" : [
{
"title" : "Open atlas-nodes dashboard" ,
2025-11-17 16:27:38 -03:00
"url" : "/d/atlas-nodes" ,
"targetBlank" : true
}
]
2025-11-17 14:22:46 -03:00
} ,
{
2025-11-18 00:11:39 -03:00
"id" : 15 ,
2025-11-17 14:22:46 -03:00
"type" : "timeseries" ,
2025-12-02 14:41:39 -03:00
"title" : "Worker Node RAM" ,
2025-11-17 14:22:46 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2025-12-02 15:15:21 -03:00
"h" : 12 ,
2025-11-17 14:22:46 -03:00
"w" : 12 ,
"x" : 12 ,
2026-01-21 14:30:55 -03:00
"y" : 36
2025-11-17 14:22:46 -03:00
} ,
"targets" : [
{
2026-01-21 14:30:55 -03:00
"expr" : "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-04|titan-05|titan-06|titan-07|titan-08|titan-09|titan-10|titan-11|titan-20|titan-21|titan-12|titan-13|titan-14|titan-15|titan-16|titan-17|titan-18|titan-19|titan-22|titan-24\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")" ,
2025-11-17 14:22:46 -03:00
"refId" : "A" ,
"legendFormat" : "{{node}}"
}
] ,
"fieldConfig" : {
"defaults" : {
"unit" : "percent"
} ,
"overrides" : [ ]
} ,
"options" : {
"legend" : {
"displayMode" : "table" ,
"placement" : "right" ,
"calcs" : [
"last"
]
} ,
"tooltip" : {
"mode" : "multi"
}
2025-11-17 16:27:38 -03:00
} ,
"links" : [
{
"title" : "Open atlas-nodes dashboard" ,
"url" : "/d/atlas-nodes" ,
"targetBlank" : true
}
]
2025-11-17 14:22:46 -03:00
} ,
{
2025-11-18 00:11:39 -03:00
"id" : 16 ,
2025-11-17 16:27:38 -03:00
"type" : "timeseries" ,
2025-11-17 21:48:12 -03:00
"title" : "Control plane CPU" ,
2025-11-17 14:22:46 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2025-12-02 15:15:21 -03:00
"h" : 10 ,
2025-11-17 14:22:46 -03:00
"w" : 12 ,
"x" : 0 ,
2026-01-21 14:30:55 -03:00
"y" : 48
2025-11-17 14:22:46 -03:00
} ,
"targets" : [
{
2026-01-06 09:50:40 -03:00
"expr" : "(avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")" ,
2025-11-17 16:27:38 -03:00
"refId" : "A" ,
"legendFormat" : "{{node}}"
2025-11-17 14:22:46 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
2025-11-17 16:27:38 -03:00
"unit" : "percent"
2025-11-17 14:22:46 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
2025-11-17 16:27:38 -03:00
"legend" : {
"displayMode" : "table" ,
"placement" : "right"
} ,
"tooltip" : {
"mode" : "multi"
2025-11-17 14:22:46 -03:00
}
2025-11-17 16:27:38 -03:00
}
2025-11-17 14:22:46 -03:00
} ,
{
2025-11-18 00:11:39 -03:00
"id" : 17 ,
2025-11-17 16:27:38 -03:00
"type" : "timeseries" ,
2025-11-17 21:48:12 -03:00
"title" : "Control plane RAM" ,
2025-11-17 14:22:46 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2025-12-02 15:15:21 -03:00
"h" : 10 ,
2025-11-17 14:22:46 -03:00
"w" : 12 ,
"x" : 12 ,
2026-01-21 14:30:55 -03:00
"y" : 48
2025-11-17 14:22:46 -03:00
} ,
"targets" : [
{
2026-01-06 09:50:40 -03:00
"expr" : "(avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-0a|titan-0b|titan-0c|titan-db|titan-jh\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")" ,
2025-11-17 16:27:38 -03:00
"refId" : "A" ,
"legendFormat" : "{{node}}"
2025-11-17 14:22:46 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
2025-11-17 16:27:38 -03:00
"unit" : "percent"
2025-11-17 14:22:46 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
2025-11-17 16:27:38 -03:00
"legend" : {
"displayMode" : "table" ,
"placement" : "right"
2025-11-17 14:22:46 -03:00
} ,
2025-11-17 16:27:38 -03:00
"tooltip" : {
"mode" : "multi"
2025-11-17 14:22:46 -03:00
}
2025-11-17 16:27:38 -03:00
}
2025-11-17 14:22:46 -03:00
} ,
2025-12-12 18:51:43 -03:00
{
"id" : 28 ,
"type" : "piechart" ,
2025-12-12 20:30:00 -03:00
"title" : "Node Pod Share" ,
2025-12-12 18:51:43 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 10 ,
"w" : 12 ,
"x" : 0 ,
2026-01-21 14:30:55 -03:00
"y" : 58
2025-12-12 18:51:43 -03:00
} ,
"targets" : [
{
2025-12-12 20:40:32 -03:00
"expr" : "(sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node) / clamp_min(sum(kube_pod_info{pod!=\"\" , node!=\"\"}), 1)) * 100" ,
2025-12-12 18:51:43 -03:00
"refId" : "A" ,
"legendFormat" : "{{namespace}}"
}
] ,
"fieldConfig" : {
"defaults" : {
"unit" : "percent" ,
"color" : {
"mode" : "palette-classic"
}
} ,
"overrides" : [ ]
} ,
"options" : {
"legend" : {
"displayMode" : "list" ,
"placement" : "right"
} ,
"pieType" : "pie" ,
2025-12-12 20:40:32 -03:00
"displayLabels" : [ ] ,
2025-12-12 18:51:43 -03:00
"tooltip" : {
"mode" : "single"
} ,
"colorScheme" : "interpolateSpectral" ,
"colorBy" : "value" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
}
}
} ,
{
"id" : 29 ,
"type" : "bargauge" ,
"title" : "Top Nodes by Pod Count" ,
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 10 ,
"w" : 12 ,
"x" : 12 ,
2026-01-21 14:30:55 -03:00
"y" : 58
2025-12-12 18:51:43 -03:00
} ,
"targets" : [
{
2025-12-12 19:09:51 -03:00
"expr" : "topk(12, sum(kube_pod_info{pod!=\"\" , node!=\"\"}) by (node))" ,
2025-12-12 18:51:43 -03:00
"refId" : "A" ,
2025-12-12 20:30:00 -03:00
"legendFormat" : "{{node}}" ,
"instant" : true
2025-12-12 18:51:43 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
"unit" : "none" ,
"min" : 0 ,
"max" : null ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "yellow" ,
"value" : 50
} ,
{
"color" : "orange" ,
2025-12-12 20:20:13 -03:00
"value" : 75
2025-12-12 18:51:43 -03:00
} ,
{
"color" : "red" ,
2025-12-12 20:20:13 -03:00
"value" : 100
2025-12-12 18:51:43 -03:00
}
]
2025-12-12 20:20:13 -03:00
} ,
"decimals" : 0
2025-12-12 18:51:43 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"displayMode" : "gradient" ,
"orientation" : "horizontal" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
}
} ,
"transformations" : [
{
"id" : "sortBy" ,
"options" : {
"fields" : [
"Value"
] ,
"order" : "desc"
}
2025-12-12 18:56:13 -03:00
} ,
{
"id" : "limit" ,
"options" : {
"limit" : 12
}
2025-12-12 18:51:43 -03:00
}
]
} ,
2025-11-17 14:22:46 -03:00
{
2025-11-18 00:11:39 -03:00
"id" : 18 ,
2025-11-17 14:22:46 -03:00
"type" : "timeseries" ,
2025-12-02 14:41:39 -03:00
"title" : "Cluster Ingress Throughput" ,
2025-11-17 14:22:46 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 7 ,
2025-11-18 14:08:33 -03:00
"w" : 8 ,
2025-11-17 14:22:46 -03:00
"x" : 0 ,
2026-01-21 14:30:55 -03:00
"y" : 29
2025-11-17 14:22:46 -03:00
} ,
"targets" : [
{
2025-11-18 16:18:52 -03:00
"expr" : "sum(rate(node_network_receive_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)" ,
2025-11-18 00:11:39 -03:00
"refId" : "A" ,
2025-11-18 14:08:33 -03:00
"legendFormat" : "Ingress (Traefik)"
2025-11-17 14:22:46 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
2025-11-17 18:55:11 -03:00
"unit" : "Bps"
2025-11-17 14:22:46 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"legend" : {
2025-11-17 16:27:38 -03:00
"displayMode" : "list" ,
2025-11-17 14:22:46 -03:00
"placement" : "bottom"
} ,
"tooltip" : {
"mode" : "multi"
}
2025-11-17 16:27:38 -03:00
} ,
"links" : [
{
"title" : "Open atlas-network dashboard" ,
"url" : "/d/atlas-network" ,
"targetBlank" : true
}
]
2025-11-17 14:22:46 -03:00
} ,
{
2025-11-18 00:11:39 -03:00
"id" : 19 ,
2025-11-17 14:22:46 -03:00
"type" : "timeseries" ,
2025-12-02 14:41:39 -03:00
"title" : "Cluster Egress Throughput" ,
2025-11-17 14:22:46 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 7 ,
2025-11-18 14:08:33 -03:00
"w" : 8 ,
"x" : 8 ,
2026-01-21 14:30:55 -03:00
"y" : 29
2025-11-17 14:22:46 -03:00
} ,
"targets" : [
{
2025-11-18 16:18:52 -03:00
"expr" : "sum(rate(node_network_transmit_bytes_total{device!~\"lo|cni.*|veth.*|flannel.*|docker.*|virbr.*|vxlan.*|wg.*\"}[5m])) or on() vector(0)" ,
2025-11-18 00:11:39 -03:00
"refId" : "A" ,
2025-11-18 14:08:33 -03:00
"legendFormat" : "Egress (Traefik)"
2025-11-17 14:22:46 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
2025-11-17 18:55:11 -03:00
"unit" : "Bps"
2025-11-17 14:22:46 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"legend" : {
2025-11-17 16:27:38 -03:00
"displayMode" : "list" ,
2025-11-17 14:22:46 -03:00
"placement" : "bottom"
} ,
"tooltip" : {
"mode" : "multi"
}
2025-11-17 16:27:38 -03:00
} ,
"links" : [
{
"title" : "Open atlas-network dashboard" ,
"url" : "/d/atlas-network" ,
"targetBlank" : true
}
]
2025-11-17 14:22:46 -03:00
} ,
{
2025-11-18 00:11:39 -03:00
"id" : 20 ,
2025-11-17 14:22:46 -03:00
"type" : "timeseries" ,
2025-12-02 14:41:39 -03:00
"title" : "Intra-Cluster Throughput" ,
2025-11-17 14:22:46 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2025-11-18 14:08:33 -03:00
"h" : 7 ,
"w" : 8 ,
"x" : 16 ,
2026-01-21 14:30:55 -03:00
"y" : 29
2025-11-17 14:22:46 -03:00
} ,
"targets" : [
{
2025-11-18 17:09:13 -03:00
"expr" : "sum(rate(container_network_receive_bytes_total{namespace!=\"traefik\",pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"traefik\",pod!=\"\"}[5m])) or on() vector(0)" ,
2025-11-17 14:22:46 -03:00
"refId" : "A" ,
2025-11-18 14:08:33 -03:00
"legendFormat" : "Internal traffic"
2025-11-17 14:22:46 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
2025-11-18 14:08:33 -03:00
"unit" : "Bps"
2025-11-17 14:22:46 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
"legend" : {
2025-11-18 14:08:33 -03:00
"displayMode" : "list" ,
"placement" : "bottom"
2025-11-17 14:22:46 -03:00
} ,
"tooltip" : {
"mode" : "multi"
}
} ,
2025-11-17 16:27:38 -03:00
"links" : [
{
2025-11-18 14:08:33 -03:00
"title" : "Open atlas-network dashboard" ,
"url" : "/d/atlas-network" ,
2025-11-17 16:27:38 -03:00
"targetBlank" : true
}
]
2025-11-17 14:22:46 -03:00
} ,
{
2025-11-18 00:11:39 -03:00
"id" : 21 ,
2025-11-18 14:08:33 -03:00
"type" : "timeseries" ,
2025-12-02 14:41:39 -03:00
"title" : "Root Filesystem Usage" ,
2025-11-17 14:22:46 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2025-12-02 15:15:21 -03:00
"h" : 16 ,
2025-11-17 14:22:46 -03:00
"w" : 12 ,
2025-11-18 14:08:33 -03:00
"x" : 0 ,
2026-01-21 14:30:55 -03:00
"y" : 68
2025-11-17 14:22:46 -03:00
} ,
"targets" : [
{
2025-11-18 14:08:33 -03:00
"expr" : "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))" ,
2025-11-18 00:11:39 -03:00
"refId" : "A" ,
"legendFormat" : "{{node}}"
2025-11-17 14:22:46 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
2025-11-18 14:08:33 -03:00
"unit" : "percent"
2025-11-17 14:22:46 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
2025-11-18 14:08:33 -03:00
"legend" : {
"displayMode" : "table" ,
"placement" : "right" ,
2025-11-17 14:22:46 -03:00
"calcs" : [
2025-11-18 14:08:33 -03:00
"last"
]
} ,
"tooltip" : {
"mode" : "multi"
2025-11-17 14:22:46 -03:00
}
2025-11-17 16:27:38 -03:00
} ,
2025-11-18 14:08:33 -03:00
"timeFrom" : "30d" ,
2025-11-17 16:27:38 -03:00
"links" : [
{
"title" : "Open atlas-storage dashboard" ,
"url" : "/d/atlas-storage" ,
"targetBlank" : true
}
]
2025-11-17 14:22:46 -03:00
} ,
{
2025-11-18 14:08:33 -03:00
"id" : 22 ,
"type" : "bargauge" ,
2025-12-02 14:41:39 -03:00
"title" : "Nodes Closest to Full Root Disks" ,
2025-11-17 14:22:46 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
2025-12-02 15:15:21 -03:00
"h" : 16 ,
2025-11-18 14:08:33 -03:00
"w" : 12 ,
"x" : 12 ,
2026-01-21 14:30:55 -03:00
"y" : 68
2025-11-17 14:22:46 -03:00
} ,
"targets" : [
{
2025-12-02 15:21:02 -03:00
"expr" : "topk(12, avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))" ,
2025-11-18 14:08:33 -03:00
"refId" : "A" ,
"legendFormat" : "{{node}}"
2025-11-17 14:22:46 -03:00
}
] ,
"fieldConfig" : {
"defaults" : {
2025-11-18 14:08:33 -03:00
"unit" : "percent" ,
"min" : 0 ,
"max" : 100 ,
2025-11-17 14:22:46 -03:00
"thresholds" : {
2025-11-18 14:08:33 -03:00
"mode" : "absolute" ,
2025-11-17 14:22:46 -03:00
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "yellow" ,
2025-11-18 14:08:33 -03:00
"value" : 50
2025-11-17 14:22:46 -03:00
} ,
{
2025-11-18 14:08:33 -03:00
"color" : "orange" ,
2025-12-12 21:13:31 -03:00
"value" : 75
2025-11-17 14:22:46 -03:00
} ,
{
"color" : "red" ,
2025-12-12 21:13:31 -03:00
"value" : 91.5
2025-11-17 14:22:46 -03:00
}
]
2025-11-17 16:27:38 -03:00
}
2025-11-17 14:22:46 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
2025-11-18 14:08:33 -03:00
"displayMode" : "gradient" ,
"orientation" : "horizontal" ,
2025-11-17 14:22:46 -03:00
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
2025-12-02 14:56:36 -03:00
"fields" : "" ,
2025-11-17 14:22:46 -03:00
"values" : false
}
2025-11-17 16:27:38 -03:00
} ,
"links" : [
{
"title" : "Open atlas-storage dashboard" ,
"url" : "/d/atlas-storage" ,
"targetBlank" : true
}
2025-12-12 18:51:43 -03:00
] ,
"transformations" : [
{
"id" : "sortBy" ,
"options" : {
"fields" : [
"Value"
] ,
"order" : "desc"
}
}
2025-11-17 14:22:46 -03:00
]
}
] ,
"schemaVersion" : 39 ,
"style" : "dark" ,
"tags" : [
"atlas" ,
"overview"
] ,
"templating" : {
2026-01-01 14:16:08 -03:00
"list" : [
{
2026-01-01 14:44:33 -03:00
"name" : "namespace_scope_cpu" ,
"label" : "CPU namespace filter" ,
"type" : "custom" ,
2026-01-18 02:50:07 -03:00
"query" : "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"" ,
2026-01-01 14:44:33 -03:00
"current" : {
"text" : "workload namespaces only" ,
2026-01-18 02:50:07 -03:00
"value" : "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"" ,
2026-01-01 14:44:33 -03:00
"selected" : true
} ,
"options" : [
{
"text" : "workload namespaces only" ,
2026-01-18 02:50:07 -03:00
"value" : "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"" ,
2026-01-01 14:44:33 -03:00
"selected" : true
} ,
{
"text" : "all namespaces" ,
"value" : "namespace=~\".*\"" ,
"selected" : false
} ,
{
"text" : "infrastructure namespaces only" ,
2026-01-18 02:50:07 -03:00
"value" : "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"" ,
2026-01-01 14:44:33 -03:00
"selected" : false
}
] ,
"hide" : 2 ,
"multi" : false ,
"includeAll" : false ,
"refresh" : 1 ,
"sort" : 0 ,
"skipUrlSync" : false
} ,
{
"name" : "namespace_scope_gpu" ,
"label" : "GPU namespace filter" ,
"type" : "custom" ,
2026-01-18 02:50:07 -03:00
"query" : "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"" ,
2026-01-01 14:44:33 -03:00
"current" : {
"text" : "workload namespaces only" ,
2026-01-18 02:50:07 -03:00
"value" : "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"" ,
2026-01-01 14:44:33 -03:00
"selected" : true
} ,
"options" : [
{
"text" : "workload namespaces only" ,
2026-01-18 02:50:07 -03:00
"value" : "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"" ,
2026-01-01 14:44:33 -03:00
"selected" : true
} ,
{
"text" : "all namespaces" ,
"value" : "namespace=~\".*\"" ,
"selected" : false
} ,
{
"text" : "infrastructure namespaces only" ,
2026-01-18 02:50:07 -03:00
"value" : "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"" ,
2026-01-01 14:44:33 -03:00
"selected" : false
}
] ,
"hide" : 2 ,
"multi" : false ,
"includeAll" : false ,
"refresh" : 1 ,
"sort" : 0 ,
"skipUrlSync" : false
} ,
{
"name" : "namespace_scope_ram" ,
"label" : "RAM namespace filter" ,
2026-01-01 14:16:08 -03:00
"type" : "custom" ,
2026-01-18 02:50:07 -03:00
"query" : "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"" ,
2026-01-01 14:16:08 -03:00
"current" : {
"text" : "workload namespaces only" ,
2026-01-18 02:50:07 -03:00
"value" : "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"" ,
2026-01-01 14:16:08 -03:00
"selected" : true
} ,
"options" : [
{
"text" : "workload namespaces only" ,
2026-01-18 02:50:07 -03:00
"value" : "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"" ,
2026-01-01 14:16:08 -03:00
"selected" : true
} ,
{
"text" : "all namespaces" ,
"value" : "namespace=~\".*\"" ,
"selected" : false
} ,
{
"text" : "infrastructure namespaces only" ,
2026-01-18 02:50:07 -03:00
"value" : "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"" ,
2026-01-01 14:16:08 -03:00
"selected" : false
}
] ,
2026-01-01 14:44:33 -03:00
"hide" : 2 ,
2026-01-01 14:16:08 -03:00
"multi" : false ,
"includeAll" : false ,
"refresh" : 1 ,
"sort" : 0 ,
"skipUrlSync" : false
}
]
2025-11-17 14:22:46 -03:00
} ,
"time" : {
2025-12-02 14:41:39 -03:00
"from" : "now-1h" ,
2025-11-17 14:22:46 -03:00
"to" : "now"
2025-11-17 16:27:38 -03:00
} ,
2025-12-02 14:41:39 -03:00
"refresh" : "1m" ,
2025-12-12 18:32:45 -03:00
"links" : [ ]
2025-11-17 14:22:46 -03:00
}