monitoring: per-panel namespace share filters
This commit is contained in:
parent
7c31d25c24
commit
5093f77c0a
@ -9,6 +9,7 @@ Usage:
|
||||
import argparse
|
||||
import json
|
||||
import textwrap
|
||||
import urllib.parse
|
||||
from pathlib import Path
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@ -80,7 +81,7 @@ CONTROL_TOTAL = len(CONTROL_PLANE_NODES)
|
||||
WORKER_TOTAL = len(WORKER_NODES)
|
||||
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
|
||||
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
|
||||
CP_ALLOWED_NS = "kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system"
|
||||
CP_ALLOWED_NS = "(^kube.*|.*-system$|^traefik$|^monitoring$)"
|
||||
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
|
||||
GAUGE_WIDTHS = [4, 3, 3, 4, 3, 3, 4]
|
||||
CONTROL_WORKLOADS_EXPR = (
|
||||
@ -170,22 +171,43 @@ def node_io_expr(scope=""):
|
||||
return scoped_node_expr(base, scope)
|
||||
|
||||
|
||||
def namespace_selector(scope_var):
|
||||
return f'namespace!="",pod!="",container!="",{scope_var}'
|
||||
|
||||
|
||||
def namespace_gpu_selector(scope_var):
|
||||
return f'namespace!="",pod!="",{scope_var}'
|
||||
|
||||
|
||||
def namespace_cpu_raw(scope_var):
|
||||
return f"sum(rate(container_cpu_usage_seconds_total{{{namespace_selector(scope_var)}}}[5m])) by (namespace)"
|
||||
|
||||
|
||||
def namespace_ram_raw(scope_var):
|
||||
return f"sum(container_memory_working_set_bytes{{{namespace_selector(scope_var)}}}) by (namespace)"
|
||||
|
||||
|
||||
def namespace_gpu_usage_instant(scope_var):
|
||||
return f"sum(DCGM_FI_DEV_GPU_UTIL{{{namespace_gpu_selector(scope_var)}}}) by (namespace)"
|
||||
|
||||
|
||||
def namespace_share_expr(resource_expr):
|
||||
total = f"clamp_min(sum( {resource_expr} ), 1)"
|
||||
return f"100 * ( {resource_expr} ) / {total}"
|
||||
|
||||
|
||||
def namespace_cpu_share_expr():
|
||||
return namespace_share_expr(NAMESPACE_CPU_RAW)
|
||||
def namespace_cpu_share_expr(scope_var):
|
||||
return namespace_share_expr(namespace_cpu_raw(scope_var))
|
||||
|
||||
|
||||
def namespace_ram_share_expr():
|
||||
return namespace_share_expr(NAMESPACE_RAM_RAW)
|
||||
def namespace_ram_share_expr(scope_var):
|
||||
return namespace_share_expr(namespace_ram_raw(scope_var))
|
||||
|
||||
|
||||
def namespace_gpu_share_expr():
|
||||
total = f"(sum({NAMESPACE_GPU_USAGE_INSTANT}) or on() vector(0))"
|
||||
share = f"100 * ({NAMESPACE_GPU_USAGE_INSTANT}) / clamp_min({total}, 1)"
|
||||
def namespace_gpu_share_expr(scope_var):
|
||||
usage = namespace_gpu_usage_instant(scope_var)
|
||||
total = f"(sum({usage}) or on() vector(0))"
|
||||
share = f"100 * ({usage}) / clamp_min({total}, 1)"
|
||||
idle = 'label_replace(vector(100), "namespace", "idle", "", "") and on() (' + total + " == 0)"
|
||||
return f"({share}) or ({idle})"
|
||||
|
||||
@ -272,20 +294,12 @@ STUCK_TABLE_EXPR = (
|
||||
")"
|
||||
)
|
||||
|
||||
NAMESPACE_SCOPE_WORKLOAD = 'namespace!~"(^kube.*|.*-system$|^traefik$)"'
|
||||
NAMESPACE_SCOPE_WORKLOAD = 'namespace!~"(^kube.*|.*-system$|^traefik$|^monitoring$)"'
|
||||
NAMESPACE_SCOPE_ALL = 'namespace=~".*"'
|
||||
NAMESPACE_SCOPE_INFRA = 'namespace=~"(^kube.*|.*-system$|^traefik$)"'
|
||||
NAMESPACE_SCOPE_VAR = "$namespace_scope"
|
||||
NAMESPACE_SELECTOR = f'namespace!="",pod!="",container!="",{NAMESPACE_SCOPE_VAR}'
|
||||
NAMESPACE_GPU_SELECTOR = f'namespace!="",pod!="",{NAMESPACE_SCOPE_VAR}'
|
||||
|
||||
NAMESPACE_CPU_RAW = (
|
||||
f'sum(rate(container_cpu_usage_seconds_total{{{NAMESPACE_SELECTOR}}}[5m])) by (namespace)'
|
||||
)
|
||||
NAMESPACE_RAM_RAW = f'sum(container_memory_working_set_bytes{{{NAMESPACE_SELECTOR}}}) by (namespace)'
|
||||
NAMESPACE_SCOPE_INFRA = 'namespace=~"(^kube.*|.*-system$|^traefik$|^monitoring$)"'
|
||||
NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"]
|
||||
GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"]
|
||||
GPU_NODE_REGEX = "|".join(GPU_NODES)
|
||||
NAMESPACE_GPU_USAGE_INSTANT = f'sum(DCGM_FI_DEV_GPU_UTIL{{{NAMESPACE_GPU_SELECTOR}}}) by (namespace)'
|
||||
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
|
||||
TRAEFIK_NET_INGRESS = (
|
||||
'sum(rate(container_network_receive_bytes_total{namespace="traefik",pod=~"traefik-.*"}[5m]))'
|
||||
@ -536,9 +550,9 @@ def table_panel(
|
||||
return panel
|
||||
|
||||
|
||||
def pie_panel(panel_id, title, expr, grid):
|
||||
def pie_panel(panel_id, title, expr, grid, *, links=None, description=None):
|
||||
"""Return a pie chart panel with readable namespace labels."""
|
||||
return {
|
||||
panel = {
|
||||
"id": panel_id,
|
||||
"type": "piechart",
|
||||
"title": title,
|
||||
@ -562,9 +576,14 @@ def pie_panel(panel_id, title, expr, grid):
|
||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
|
||||
},
|
||||
}
|
||||
if links:
|
||||
panel["links"] = links
|
||||
if description:
|
||||
panel["description"] = description
|
||||
return panel
|
||||
|
||||
|
||||
def namespace_scope_variable():
|
||||
def namespace_scope_variable(var_name, label):
|
||||
options = [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
@ -587,13 +606,13 @@ def namespace_scope_variable():
|
||||
+ NAMESPACE_SCOPE_INFRA
|
||||
)
|
||||
return {
|
||||
"name": "namespace_scope",
|
||||
"label": "Namespace filter",
|
||||
"name": var_name,
|
||||
"label": label,
|
||||
"type": "custom",
|
||||
"query": query,
|
||||
"current": {"text": options[0]["text"], "value": options[0]["value"], "selected": True},
|
||||
"options": options,
|
||||
"hide": 0,
|
||||
"hide": 2,
|
||||
"multi": False,
|
||||
"includeAll": False,
|
||||
"refresh": 1,
|
||||
@ -602,6 +621,28 @@ def namespace_scope_variable():
|
||||
}
|
||||
|
||||
|
||||
def namespace_scope_links(var_name):
|
||||
def with_value(value):
|
||||
encoded = urllib.parse.quote(value, safe="")
|
||||
params = []
|
||||
for other in NAMESPACE_SCOPE_VARS:
|
||||
if other == var_name:
|
||||
params.append(f"var-{other}={encoded}")
|
||||
else:
|
||||
params.append(f"var-{other}=${{{other}}}")
|
||||
return "?" + "&".join(params)
|
||||
|
||||
return [
|
||||
{"title": "Workload namespaces only", "url": with_value(NAMESPACE_SCOPE_WORKLOAD), "targetBlank": False},
|
||||
{"title": "All namespaces", "url": with_value(NAMESPACE_SCOPE_ALL), "targetBlank": False},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": with_value(NAMESPACE_SCOPE_INFRA),
|
||||
"targetBlank": False,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def bargauge_panel(
|
||||
panel_id,
|
||||
title,
|
||||
@ -890,28 +931,38 @@ def build_overview():
|
||||
)
|
||||
)
|
||||
|
||||
cpu_scope = "$namespace_scope_cpu"
|
||||
gpu_scope = "$namespace_scope_gpu"
|
||||
ram_scope = "$namespace_scope_ram"
|
||||
|
||||
panels.append(
|
||||
pie_panel(
|
||||
11,
|
||||
"Namespace CPU Share",
|
||||
namespace_cpu_share_expr(),
|
||||
namespace_cpu_share_expr(cpu_scope),
|
||||
{"h": 9, "w": 8, "x": 0, "y": 16},
|
||||
links=namespace_scope_links("namespace_scope_cpu"),
|
||||
description="Use panel links to switch namespace scope.",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
pie_panel(
|
||||
12,
|
||||
"Namespace GPU Share",
|
||||
namespace_gpu_share_expr(),
|
||||
namespace_gpu_share_expr(gpu_scope),
|
||||
{"h": 9, "w": 8, "x": 8, "y": 16},
|
||||
links=namespace_scope_links("namespace_scope_gpu"),
|
||||
description="Use panel links to switch namespace scope.",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
pie_panel(
|
||||
13,
|
||||
"Namespace RAM Share",
|
||||
namespace_ram_share_expr(),
|
||||
namespace_ram_share_expr(ram_scope),
|
||||
{"h": 9, "w": 8, "x": 16, "y": 16},
|
||||
links=namespace_scope_links("namespace_scope_ram"),
|
||||
description="Use panel links to switch namespace scope.",
|
||||
)
|
||||
)
|
||||
|
||||
@ -1077,7 +1128,13 @@ def build_overview():
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": ["atlas", "overview"],
|
||||
"templating": {"list": [namespace_scope_variable()]},
|
||||
"templating": {
|
||||
"list": [
|
||||
namespace_scope_variable("namespace_scope_cpu", "CPU namespace filter"),
|
||||
namespace_scope_variable("namespace_scope_gpu", "GPU namespace filter"),
|
||||
namespace_scope_variable("namespace_scope_ram", "RAM namespace filter"),
|
||||
]
|
||||
},
|
||||
"time": {"from": "now-1h", "to": "now"},
|
||||
"refresh": "1m",
|
||||
"links": [],
|
||||
@ -1718,19 +1775,22 @@ def build_network_dashboard():
|
||||
|
||||
def build_gpu_dashboard():
|
||||
panels = []
|
||||
gpu_scope = "$namespace_scope_gpu"
|
||||
panels.append(
|
||||
pie_panel(
|
||||
1,
|
||||
"Namespace GPU Share",
|
||||
namespace_gpu_share_expr(),
|
||||
namespace_gpu_share_expr(gpu_scope),
|
||||
{"h": 8, "w": 12, "x": 0, "y": 0},
|
||||
links=namespace_scope_links("namespace_scope_gpu"),
|
||||
description="Use panel links to switch namespace scope.",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
timeseries_panel(
|
||||
2,
|
||||
"GPU Util by Namespace",
|
||||
NAMESPACE_GPU_USAGE_INSTANT,
|
||||
namespace_gpu_usage_instant(gpu_scope),
|
||||
{"h": 8, "w": 12, "x": 12, "y": 0},
|
||||
unit="percent",
|
||||
legend="{{namespace}}",
|
||||
@ -1771,7 +1831,13 @@ def build_gpu_dashboard():
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": ["atlas", "gpu"],
|
||||
"templating": {"list": [namespace_scope_variable()]},
|
||||
"templating": {
|
||||
"list": [
|
||||
namespace_scope_variable("namespace_scope_cpu", "CPU namespace filter"),
|
||||
namespace_scope_variable("namespace_scope_gpu", "GPU namespace filter"),
|
||||
namespace_scope_variable("namespace_scope_ram", "RAM namespace filter"),
|
||||
]
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -7,6 +7,8 @@ metadata:
|
||||
spec:
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 2
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app: ollama
|
||||
|
||||
@ -200,24 +200,3 @@ spec:
|
||||
port:
|
||||
number: 80
|
||||
pathType: Prefix
|
||||
---
|
||||
# Source: element-web/templates/tests/test-connection.yaml
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: "othrys-element-element-web-test-connection"
|
||||
labels:
|
||||
helm.sh/chart: element-web-1.4.26
|
||||
app.kubernetes.io/name: element-web
|
||||
app.kubernetes.io/instance: othrys-element
|
||||
app.kubernetes.io/version: "1.12.6"
|
||||
app.kubernetes.io/managed-by: Helm
|
||||
annotations:
|
||||
"helm.sh/hook": test-success
|
||||
spec:
|
||||
containers:
|
||||
- name: wget
|
||||
image: busybox
|
||||
command: ['wget']
|
||||
args: ['othrys-element-element-web:80']
|
||||
restartPolicy: Never
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0))",
|
||||
"expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)) == 0))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}"
|
||||
}
|
||||
@ -53,7 +53,25 @@
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "All namespaces",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22.%2A%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Use panel links to switch namespace scope."
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
@ -71,7 +89,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)",
|
||||
"expr": "sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}"
|
||||
}
|
||||
@ -186,19 +204,19 @@
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"name": "namespace_scope",
|
||||
"label": "Namespace filter",
|
||||
"name": "namespace_scope_cpu",
|
||||
"label": "CPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -208,11 +226,79 @@
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
"hide": 0,
|
||||
"hide": 2,
|
||||
"multi": false,
|
||||
"includeAll": false,
|
||||
"refresh": 1,
|
||||
"sort": 0,
|
||||
"skipUrlSync": false
|
||||
},
|
||||
{
|
||||
"name": "namespace_scope_gpu",
|
||||
"label": "GPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
"text": "all namespaces",
|
||||
"value": "namespace=~\".*\"",
|
||||
"selected": false
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
"hide": 2,
|
||||
"multi": false,
|
||||
"includeAll": false,
|
||||
"refresh": 1,
|
||||
"sort": 0,
|
||||
"skipUrlSync": false
|
||||
},
|
||||
{
|
||||
"name": "namespace_scope_ram",
|
||||
"label": "RAM namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
"text": "all namespaces",
|
||||
"value": "namespace=~\".*\"",
|
||||
"selected": false
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
"hide": 2,
|
||||
"multi": false,
|
||||
"includeAll": false,
|
||||
"refresh": 1,
|
||||
|
||||
@ -142,7 +142,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
@ -76,7 +76,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"}) or on() vector(0)",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"}) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -1086,7 +1086,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}[5m])) by (namespace) ), 1)",
|
||||
"expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ), 1)",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}"
|
||||
}
|
||||
@ -1119,7 +1119,25 @@
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "All namespaces",
|
||||
"url": "?var-namespace_scope_cpu=namespace%3D~%22.%2A%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Use panel links to switch namespace scope."
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
@ -1137,7 +1155,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0))",
|
||||
"expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)) == 0))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}"
|
||||
}
|
||||
@ -1170,7 +1188,25 @@
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "All namespaces",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22.%2A%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Use panel links to switch namespace scope."
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
@ -1188,7 +1224,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}) by (namespace) ), 1)",
|
||||
"expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ), 1)",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}"
|
||||
}
|
||||
@ -1221,7 +1257,25 @@
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "All namespaces",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22.%2A%22",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Use panel links to switch namespace scope."
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
@ -1793,19 +1847,19 @@
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"name": "namespace_scope",
|
||||
"label": "Namespace filter",
|
||||
"name": "namespace_scope_cpu",
|
||||
"label": "CPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -1815,11 +1869,79 @@
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
"hide": 0,
|
||||
"hide": 2,
|
||||
"multi": false,
|
||||
"includeAll": false,
|
||||
"refresh": 1,
|
||||
"sort": 0,
|
||||
"skipUrlSync": false
|
||||
},
|
||||
{
|
||||
"name": "namespace_scope_gpu",
|
||||
"label": "GPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
"text": "all namespaces",
|
||||
"value": "namespace=~\".*\"",
|
||||
"selected": false
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
"hide": 2,
|
||||
"multi": false,
|
||||
"includeAll": false,
|
||||
"refresh": 1,
|
||||
"sort": 0,
|
||||
"skipUrlSync": false
|
||||
},
|
||||
{
|
||||
"name": "namespace_scope_ram",
|
||||
"label": "RAM namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
"text": "all namespaces",
|
||||
"value": "namespace=~\".*\"",
|
||||
"selected": false
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
"hide": 2,
|
||||
"multi": false,
|
||||
"includeAll": false,
|
||||
"refresh": 1,
|
||||
|
||||
@ -200,7 +200,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
@ -29,7 +29,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0))",
|
||||
"expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)) == 0))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}"
|
||||
}
|
||||
@ -62,7 +62,25 @@ data:
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "All namespaces",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22.%2A%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Use panel links to switch namespace scope."
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
@ -80,7 +98,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)",
|
||||
"expr": "sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}"
|
||||
}
|
||||
@ -195,19 +213,19 @@ data:
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"name": "namespace_scope",
|
||||
"label": "Namespace filter",
|
||||
"name": "namespace_scope_cpu",
|
||||
"label": "CPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -217,11 +235,79 @@ data:
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
"hide": 0,
|
||||
"hide": 2,
|
||||
"multi": false,
|
||||
"includeAll": false,
|
||||
"refresh": 1,
|
||||
"sort": 0,
|
||||
"skipUrlSync": false
|
||||
},
|
||||
{
|
||||
"name": "namespace_scope_gpu",
|
||||
"label": "GPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
"text": "all namespaces",
|
||||
"value": "namespace=~\".*\"",
|
||||
"selected": false
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
"hide": 2,
|
||||
"multi": false,
|
||||
"includeAll": false,
|
||||
"refresh": 1,
|
||||
"sort": 0,
|
||||
"skipUrlSync": false
|
||||
},
|
||||
{
|
||||
"name": "namespace_scope_ram",
|
||||
"label": "RAM namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
"text": "all namespaces",
|
||||
"value": "namespace=~\".*\"",
|
||||
"selected": false
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
"hide": 2,
|
||||
"multi": false,
|
||||
"includeAll": false,
|
||||
"refresh": 1,
|
||||
|
||||
@ -151,7 +151,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
@ -85,7 +85,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"}) or on() vector(0)",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"}) or on() vector(0)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
@ -1095,7 +1095,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}[5m])) by (namespace) ), 1)",
|
||||
"expr": "100 * ( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ) / clamp_min(sum( sum(rate(container_cpu_usage_seconds_total{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_cpu}[5m])) by (namespace) ), 1)",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}"
|
||||
}
|
||||
@ -1128,7 +1128,25 @@ data:
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "All namespaces",
|
||||
"url": "?var-namespace_scope_cpu=namespace%3D~%22.%2A%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Use panel links to switch namespace scope."
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
@ -1146,7 +1164,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope}) by (namespace)) or on() vector(0)) == 0))",
|
||||
"expr": "(100 * (sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) / clamp_min((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)), 1)) or (label_replace(vector(100), \"namespace\", \"idle\", \"\", \"\") and on() ((sum(sum(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\",$namespace_scope_gpu}) by (namespace)) or on() vector(0)) == 0))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}"
|
||||
}
|
||||
@ -1179,7 +1197,25 @@ data:
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "All namespaces",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22.%2A%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22&var-namespace_scope_ram=${namespace_scope_ram}",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Use panel links to switch namespace scope."
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
@ -1197,7 +1233,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope}) by (namespace) ), 1)",
|
||||
"expr": "100 * ( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ) / clamp_min(sum( sum(container_memory_working_set_bytes{namespace!=\"\",pod!=\"\",container!=\"\",$namespace_scope_ram}) by (namespace) ), 1)",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{namespace}}"
|
||||
}
|
||||
@ -1230,7 +1266,25 @@ data:
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Workload namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "All namespaces",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22.%2A%22",
|
||||
"targetBlank": false
|
||||
},
|
||||
{
|
||||
"title": "Infrastructure namespaces only",
|
||||
"url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%28%5Ekube.%2A%7C.%2A-system%24%7C%5Etraefik%24%7C%5Emonitoring%24%29%22",
|
||||
"targetBlank": false
|
||||
}
|
||||
],
|
||||
"description": "Use panel links to switch namespace scope."
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
@ -1802,19 +1856,19 @@ data:
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"name": "namespace_scope",
|
||||
"label": "Namespace filter",
|
||||
"name": "namespace_scope_cpu",
|
||||
"label": "CPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
@ -1824,11 +1878,79 @@ data:
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$)\"",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
"hide": 0,
|
||||
"hide": 2,
|
||||
"multi": false,
|
||||
"includeAll": false,
|
||||
"refresh": 1,
|
||||
"sort": 0,
|
||||
"skipUrlSync": false
|
||||
},
|
||||
{
|
||||
"name": "namespace_scope_gpu",
|
||||
"label": "GPU namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
"text": "all namespaces",
|
||||
"value": "namespace=~\".*\"",
|
||||
"selected": false
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
"hide": 2,
|
||||
"multi": false,
|
||||
"includeAll": false,
|
||||
"refresh": 1,
|
||||
"sort": 0,
|
||||
"skipUrlSync": false
|
||||
},
|
||||
{
|
||||
"name": "namespace_scope_ram",
|
||||
"label": "RAM namespace filter",
|
||||
"type": "custom",
|
||||
"query": "workload namespaces only : namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"current": {
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
"options": [
|
||||
{
|
||||
"text": "workload namespaces only",
|
||||
"value": "namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": true
|
||||
},
|
||||
{
|
||||
"text": "all namespaces",
|
||||
"value": "namespace=~\".*\"",
|
||||
"selected": false
|
||||
},
|
||||
{
|
||||
"text": "infrastructure namespaces only",
|
||||
"value": "namespace=~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"",
|
||||
"selected": false
|
||||
}
|
||||
],
|
||||
"hide": 2,
|
||||
"multi": false,
|
||||
"includeAll": false,
|
||||
"refresh": 1,
|
||||
|
||||
@ -209,7 +209,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system\"})",
|
||||
"expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"(^kube.*|.*-system$|^traefik$|^monitoring$)\"})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user