2025-12-02 13:16:00 -03:00
{
"uid" : "atlas-gpu" ,
"title" : "Atlas GPU" ,
"folderUid" : "atlas-internal" ,
"editable" : true ,
"panels" : [
{
"id" : 1 ,
"type" : "piechart" ,
2025-12-02 14:41:39 -03:00
"title" : "Namespace GPU Share" ,
2025-12-02 13:16:00 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 8 ,
"w" : 12 ,
"x" : 0 ,
"y" : 0
} ,
"targets" : [
{
2026-01-01 14:16:08 -03:00
"expr" : "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0)" ,
2025-12-02 13:16:00 -03:00
"refId" : "A" ,
"legendFormat" : "{{namespace}}"
}
] ,
"fieldConfig" : {
"defaults" : {
"unit" : "percent" ,
"color" : {
"mode" : "palette-classic"
}
} ,
"overrides" : [ ]
} ,
"options" : {
"legend" : {
"displayMode" : "list" ,
"placement" : "right"
} ,
"pieType" : "pie" ,
2025-12-12 20:40:32 -03:00
"displayLabels" : [ ] ,
2025-12-02 13:16:00 -03:00
"tooltip" : {
"mode" : "single"
} ,
"colorScheme" : "interpolateSpectral" ,
"colorBy" : "value" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
}
}
} ,
{
"id" : 2 ,
"type" : "timeseries" ,
2025-12-02 14:41:39 -03:00
"title" : "GPU Util by Namespace" ,
2025-12-02 13:16:00 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 8 ,
"w" : 12 ,
"x" : 12 ,
"y" : 0
} ,
"targets" : [
{
2026-01-01 14:16:08 -03:00
"expr" : "sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)" ,
2025-12-02 13:16:00 -03:00
"refId" : "A" ,
"legendFormat" : "{{namespace}}"
}
] ,
"fieldConfig" : {
"defaults" : {
"unit" : "percent"
} ,
"overrides" : [ ]
} ,
"options" : {
"legend" : {
"displayMode" : "table" ,
"placement" : "right"
} ,
"tooltip" : {
"mode" : "multi"
}
}
} ,
{
"id" : 3 ,
"type" : "timeseries" ,
2025-12-02 14:41:39 -03:00
"title" : "GPU Util by Node" ,
2025-12-02 13:16:00 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 8 ,
"w" : 12 ,
"x" : 0 ,
"y" : 8
} ,
"targets" : [
{
"expr" : "sum by (Hostname) (DCGM_FI_DEV_GPU_UTIL{pod!=\"\"})" ,
"refId" : "A" ,
"legendFormat" : "{{Hostname}}"
}
] ,
"fieldConfig" : {
"defaults" : {
"unit" : "percent"
} ,
"overrides" : [ ]
} ,
"options" : {
"legend" : {
"displayMode" : "table" ,
"placement" : "right"
} ,
"tooltip" : {
"mode" : "multi"
}
}
} ,
{
"id" : 4 ,
"type" : "table" ,
2025-12-02 14:41:39 -03:00
"title" : "Top Pods by GPU Util" ,
2025-12-02 13:16:00 -03:00
"datasource" : {
"type" : "prometheus" ,
"uid" : "atlas-vm"
} ,
"gridPos" : {
"h" : 8 ,
"w" : 12 ,
"x" : 12 ,
"y" : 8
} ,
"targets" : [
{
"expr" : "topk(10, sum(DCGM_FI_DEV_GPU_UTIL{pod!=\"\"}) by (namespace,pod,Hostname))" ,
"refId" : "A"
}
] ,
"fieldConfig" : {
"defaults" : {
2025-12-13 18:23:19 -03:00
"unit" : "percent" ,
"custom" : {
"filterable" : true
}
2025-12-02 13:16:00 -03:00
} ,
"overrides" : [ ]
} ,
"options" : {
2025-12-13 18:23:19 -03:00
"showHeader" : true ,
"columnFilters" : false
2025-12-02 13:16:00 -03:00
} ,
"transformations" : [
{
"id" : "labelsToFields" ,
"options" : { }
}
]
}
] ,
"time" : {
"from" : "now-12h" ,
"to" : "now"
} ,
"annotations" : {
"list" : [ ]
} ,
"schemaVersion" : 39 ,
"style" : "dark" ,
"tags" : [
"atlas" ,
"gpu"
2026-01-01 14:16:08 -03:00
] ,
"templating" : {
"list" : [
{
"name" : "namespace_scope" ,
"label" : "Namespace filter" ,
"type" : "custom" ,
"query" : "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$)\"" ,
"current" : {
"text" : "workload namespaces only" ,
"value" : "namespace!~\"(^kube.*|.*-system$|^traefik$)\"" ,
"selected" : true
} ,
"options" : [
{
"text" : "workload namespaces only" ,
"value" : "namespace!~\"(^kube.*|.*-system$|^traefik$)\"" ,
"selected" : true
} ,
{
"text" : "all namespaces" ,
"value" : "namespace=~\".*\"" ,
"selected" : false
} ,
{
"text" : "infrastructure namespaces only" ,
"value" : "namespace=~\"(^kube.*|.*-system$|^traefik$)\"" ,
"selected" : false
}
] ,
"hide" : 0 ,
"multi" : false ,
"includeAll" : false ,
"refresh" : 1 ,
"sort" : 0 ,
"skipUrlSync" : false
}
]
}
2025-12-02 13:16:00 -03:00
}