monitoring: polish dashboards

This commit is contained in:
Brad Stein 2025-11-17 18:55:11 -03:00
parent 8f5781d3cf
commit 349d9c56ac
9 changed files with 104 additions and 93 deletions

View File

@ -80,6 +80,7 @@ WORKER_TOTAL = len(WORKER_NODES)
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
CP_ALLOWED_NS = "kube-system|kube-public|kube-node-lease|longhorn-system|monitoring"
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
# ---------------------------------------------------------------------------
# PromQL helpers
@ -149,9 +150,10 @@ CRASHLOOP_EXPR = (
'{reason=~"CrashLoopBackOff|ImagePullBackOff"}))'
)
STUCK_TERMINATING_EXPR = (
'sum(max by (namespace,pod) (('
'(time() - kube_pod_deletion_timestamp{pod!=""}) > 600'
') and on(namespace,pod) kube_pod_deletion_timestamp{pod!=""} > 0))'
'sum(max by (namespace,pod) ('
'((time() - kube_pod_deletion_timestamp{pod!=""}) > bool 600)'
' and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=""} > bool 0)'
'))'
)
PROBLEM_TABLE_EXPR = (
@ -168,9 +170,11 @@ CRASHLOOP_TABLE_EXPR = (
"(kube_pod_container_status_waiting_reason{reason=~\"CrashLoopBackOff|ImagePullBackOff\"})"
)
STUCK_TABLE_EXPR = (
"("
"((time() - kube_pod_deletion_timestamp{pod!=\"\"}) "
"* on(namespace,pod) group_left(node) kube_pod_info) "
"and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0"
"and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)) "
"* on(namespace,pod) group_left(node) kube_pod_info"
")"
)
NAMESPACE_CPU_EXPR = (
@ -192,6 +196,7 @@ IO_SERIES_EXPR = (
"+ rate(node_disk_written_bytes_total[5m]))"
)
IO_TOP_EXPR = f"topk(1, {IO_SERIES_EXPR})"
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
NET_INGRESS_EXPR = (
'sum(rate(container_network_receive_bytes_total{namespace!="" ,pod!=""}[5m])) '
"or on() vector(0)"
@ -216,6 +221,7 @@ def stat_panel(
thresholds=None,
text_mode="value",
legend=None,
display_name=None,
value_suffix=None,
links=None,
):
@ -236,6 +242,8 @@ def stat_panel(
}
if value_suffix:
defaults["custom"]["valueSuffix"] = value_suffix
if display_name:
defaults["displayName"] = display_name
panel = {
"id": panel_id,
"type": "stat",
@ -449,8 +457,8 @@ def build_overview():
hottest = [
(7, "Hottest node: CPU", f"topk(1, {node_cpu_expr()})", "percent"),
(8, "Hottest node: RAM", f"topk(1, {node_mem_expr()})", "percent"),
(9, "Hottest node: NET", NET_TOP_EXPR, "bytes/sec"),
(10, "Hottest node: I/O", IO_TOP_EXPR, "bytes/sec"),
(9, "Hottest node: NET", NET_TOP_EXPR, "Bps"),
(10, "Hottest node: I/O", IO_TOP_EXPR, "Bps"),
]
for idx, (panel_id, title, expr, unit) in enumerate(hottest):
panels.append(
@ -462,7 +470,7 @@ def build_overview():
unit=unit,
thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
text_mode="value_and_name",
legend="{{node}}",
display_name="{{node}}",
links=link_to("atlas-nodes"),
)
)
@ -544,7 +552,7 @@ def build_overview():
"Cluster ingress throughput",
NET_INGRESS_EXPR,
{"h": 7, "w": 12, "x": 0, "y": 34},
unit="bytes/sec",
unit="Bps",
legend_display="list",
legend_placement="bottom",
links=link_to("atlas-network"),
@ -556,7 +564,7 @@ def build_overview():
"Cluster egress throughput",
NET_EGRESS_EXPR,
{"h": 7, "w": 12, "x": 12, "y": 34},
unit="bytes/sec",
unit="Bps",
legend_display="list",
legend_placement="bottom",
links=link_to("atlas-network"),
@ -616,8 +624,8 @@ def build_overview():
storage_panels = [
(21, "Astreae usage", astreae_usage_expr("/mnt/astreae"), "percent"),
(22, "Asteria usage", astreae_usage_expr("/mnt/asteria"), "percent"),
(23, "Astreae free", astreae_free_expr("/mnt/astreae"), "bytesSI"),
(24, "Asteria free", astreae_free_expr("/mnt/asteria"), "bytesSI"),
(23, "Astreae free", astreae_free_expr("/mnt/astreae"), "decbytes"),
(24, "Asteria free", astreae_free_expr("/mnt/asteria"), "decbytes"),
]
for idx, (panel_id, title, expr, unit) in enumerate(storage_panels):
panels.append(
@ -911,7 +919,7 @@ def build_storage_dashboard():
"Astreae free",
astreae_free_expr("/mnt/astreae"),
{"h": 5, "w": 6, "x": 12, "y": 0},
unit="bytesSI",
unit="decbytes",
)
)
panels.append(
@ -920,14 +928,14 @@ def build_storage_dashboard():
"Asteria free",
astreae_free_expr("/mnt/asteria"),
{"h": 5, "w": 6, "x": 18, "y": 0},
unit="bytesSI",
unit="decbytes",
)
)
panels.append(
timeseries_panel(
5,
"Astreae per-node usage",
filesystem_usage_expr("/mnt/astreae"),
filesystem_usage_expr("/mnt/astreae", LONGHORN_NODE_REGEX),
{"h": 9, "w": 12, "x": 0, "y": 5},
unit="percent",
legend="{{node}}",
@ -940,7 +948,7 @@ def build_storage_dashboard():
timeseries_panel(
6,
"Asteria per-node usage",
filesystem_usage_expr("/mnt/asteria"),
filesystem_usage_expr("/mnt/asteria", LONGHORN_NODE_REGEX),
{"h": 9, "w": 12, "x": 12, "y": 5},
unit="percent",
legend="{{node}}",
@ -986,18 +994,19 @@ def build_storage_dashboard():
def build_network_dashboard():
panels = []
panels.append(
stat_panel(1, "Ingress bytes/s", NET_INGRESS_EXPR, {"h": 4, "w": 8, "x": 0, "y": 0}, unit="bytes/sec")
stat_panel(1, "Ingress traffic", NET_INGRESS_EXPR, {"h": 4, "w": 8, "x": 0, "y": 0}, unit="Bps")
)
panels.append(
stat_panel(2, "Egress bytes/s", NET_EGRESS_EXPR, {"h": 4, "w": 8, "x": 8, "y": 0}, unit="bytes/sec")
stat_panel(2, "Egress traffic", NET_EGRESS_EXPR, {"h": 4, "w": 8, "x": 8, "y": 0}, unit="Bps")
)
panels.append(
stat_panel(
3,
"Top router req/s",
'max(topk(1, rate(traefik_router_requests_total[5m])))',
f"topk(1, {TRAEFIK_ROUTER_EXPR})",
{"h": 4, "w": 8, "x": 16, "y": 0},
unit="req/s",
display_name="{{router}}",
)
)
panels.append(
@ -1006,7 +1015,7 @@ def build_network_dashboard():
"Per-node throughput",
NET_SERIES_EXPR,
{"h": 8, "w": 24, "x": 0, "y": 4},
unit="bytes/sec",
unit="Bps",
legend="{{node}}",
legend_display="table",
legend_placement="right",
@ -1019,7 +1028,7 @@ def build_network_dashboard():
'topk(10, sum(rate(container_network_transmit_bytes_total{namespace!=""}[5m]) '
'+ rate(container_network_receive_bytes_total{namespace!=""}[5m])) by (namespace))',
{"h": 9, "w": 12, "x": 0, "y": 12},
unit="bytes/sec",
unit="Bps",
transformations=[{"id": "labelsToFields", "options": {}}],
)
)
@ -1030,7 +1039,7 @@ def build_network_dashboard():
'topk(10, sum(rate(container_network_transmit_bytes_total{pod!=""}[5m]) '
'+ rate(container_network_receive_bytes_total{pod!=""}[5m])) by (namespace,pod))',
{"h": 9, "w": 12, "x": 12, "y": 12},
unit="bytes/sec",
unit="Bps",
transformations=[{"id": "labelsToFields", "options": {}}],
)
)
@ -1038,7 +1047,7 @@ def build_network_dashboard():
timeseries_panel(
7,
"Traefik routers (req/s)",
'topk(10, rate(traefik_router_requests_total[5m]))',
f"topk(10, {TRAEFIK_ROUTER_EXPR})",
{"h": 9, "w": 12, "x": 0, "y": 21},
unit="req/s",
legend="{{router}}",

View File

@ -7,7 +7,7 @@
{
"id": 1,
"type": "stat",
"title": "Ingress bytes/s",
"title": "Ingress traffic",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -43,7 +43,7 @@
}
]
},
"unit": "bytes/sec",
"unit": "Bps",
"custom": {
"displayMode": "auto"
}
@ -67,7 +67,7 @@
{
"id": 2,
"type": "stat",
"title": "Egress bytes/s",
"title": "Egress traffic",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -103,7 +103,7 @@
}
]
},
"unit": "bytes/sec",
"unit": "Bps",
"custom": {
"displayMode": "auto"
}
@ -140,7 +140,7 @@
},
"targets": [
{
"expr": "max(topk(1, rate(traefik_router_requests_total[5m])))",
"expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
"refId": "A"
}
],
@ -166,7 +166,8 @@
"unit": "req/s",
"custom": {
"displayMode": "auto"
}
},
"displayName": "{{router}}"
},
"overrides": []
},
@ -207,7 +208,7 @@
],
"fieldConfig": {
"defaults": {
"unit": "bytes/sec"
"unit": "Bps"
},
"overrides": []
},
@ -243,7 +244,7 @@
],
"fieldConfig": {
"defaults": {
"unit": "bytes/sec"
"unit": "Bps"
},
"overrides": []
},
@ -279,7 +280,7 @@
],
"fieldConfig": {
"defaults": {
"unit": "bytes/sec"
"unit": "Bps"
},
"overrides": []
},
@ -309,7 +310,7 @@
},
"targets": [
{
"expr": "topk(10, rate(traefik_router_requests_total[5m]))",
"expr": "topk(10, sum by (router) (rate(traefik_router_requests_total[5m])))",
"refId": "A",
"legendFormat": "{{router}}"
}

View File

@ -339,7 +339,7 @@
},
"targets": [
{
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > 600) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0))",
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)))",
"refId": "A"
}
],
@ -407,8 +407,7 @@
"targets": [
{
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A",
"legendFormat": "{{node}}"
"refId": "A"
}
],
"fieldConfig": {
@ -437,7 +436,8 @@
"unit": "percent",
"custom": {
"displayMode": "auto"
}
},
"displayName": "{{node}}"
},
"overrides": []
},
@ -479,8 +479,7 @@
"targets": [
{
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A",
"legendFormat": "{{node}}"
"refId": "A"
}
],
"fieldConfig": {
@ -509,7 +508,8 @@
"unit": "percent",
"custom": {
"displayMode": "auto"
}
},
"displayName": "{{node}}"
},
"overrides": []
},
@ -551,8 +551,7 @@
"targets": [
{
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
"refId": "A",
"legendFormat": "{{node}}"
"refId": "A"
}
],
"fieldConfig": {
@ -574,10 +573,11 @@
}
]
},
"unit": "bytes/sec",
"unit": "Bps",
"custom": {
"displayMode": "auto"
}
},
"displayName": "{{node}}"
},
"overrides": []
},
@ -619,8 +619,7 @@
"targets": [
{
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
"refId": "A",
"legendFormat": "{{node}}"
"refId": "A"
}
],
"fieldConfig": {
@ -642,10 +641,11 @@
}
]
},
"unit": "bytes/sec",
"unit": "Bps",
"custom": {
"displayMode": "auto"
}
},
"displayName": "{{node}}"
},
"overrides": []
},
@ -944,7 +944,7 @@
],
"fieldConfig": {
"defaults": {
"unit": "bytes/sec"
"unit": "Bps"
},
"overrides": []
},
@ -987,7 +987,7 @@
],
"fieldConfig": {
"defaults": {
"unit": "bytes/sec"
"unit": "Bps"
},
"overrides": []
},
@ -1306,7 +1306,7 @@
}
]
},
"unit": "bytesSI",
"unit": "decbytes",
"custom": {
"displayMode": "auto"
}
@ -1373,7 +1373,7 @@
}
]
},
"unit": "bytesSI",
"unit": "decbytes",
"custom": {
"displayMode": "auto"
}

View File

@ -140,7 +140,7 @@
},
"targets": [
{
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > 600) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0))",
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)))",
"refId": "A"
}
],
@ -332,7 +332,7 @@
},
"targets": [
{
"expr": "((time() - kube_pod_deletion_timestamp{pod!=\"\"}) * on(namespace,pod) group_left(node) kube_pod_info) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0",
"expr": "(((time() - kube_pod_deletion_timestamp{pod!=\"\"}) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)) * on(namespace,pod) group_left(node) kube_pod_info)",
"refId": "A"
}
],

View File

@ -171,7 +171,7 @@
}
]
},
"unit": "bytesSI",
"unit": "decbytes",
"custom": {
"displayMode": "auto"
}
@ -231,7 +231,7 @@
}
]
},
"unit": "bytesSI",
"unit": "decbytes",
"custom": {
"displayMode": "auto"
}
@ -268,7 +268,7 @@
},
"targets": [
{
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
"expr": "(avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-1[2-9]|titan-2[24]\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"refId": "A",
"legendFormat": "{{node}}"
}
@ -306,7 +306,7 @@
},
"targets": [
{
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
"expr": "(avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-1[2-9]|titan-2[24]\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"refId": "A",
"legendFormat": "{{node}}"
}

View File

@ -16,7 +16,7 @@ data:
{
"id": 1,
"type": "stat",
"title": "Ingress bytes/s",
"title": "Ingress traffic",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -52,7 +52,7 @@ data:
}
]
},
"unit": "bytes/sec",
"unit": "Bps",
"custom": {
"displayMode": "auto"
}
@ -76,7 +76,7 @@ data:
{
"id": 2,
"type": "stat",
"title": "Egress bytes/s",
"title": "Egress traffic",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -112,7 +112,7 @@ data:
}
]
},
"unit": "bytes/sec",
"unit": "Bps",
"custom": {
"displayMode": "auto"
}
@ -149,7 +149,7 @@ data:
},
"targets": [
{
"expr": "max(topk(1, rate(traefik_router_requests_total[5m])))",
"expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
"refId": "A"
}
],
@ -175,7 +175,8 @@ data:
"unit": "req/s",
"custom": {
"displayMode": "auto"
}
},
"displayName": "{{router}}"
},
"overrides": []
},
@ -216,7 +217,7 @@ data:
],
"fieldConfig": {
"defaults": {
"unit": "bytes/sec"
"unit": "Bps"
},
"overrides": []
},
@ -252,7 +253,7 @@ data:
],
"fieldConfig": {
"defaults": {
"unit": "bytes/sec"
"unit": "Bps"
},
"overrides": []
},
@ -288,7 +289,7 @@ data:
],
"fieldConfig": {
"defaults": {
"unit": "bytes/sec"
"unit": "Bps"
},
"overrides": []
},
@ -318,7 +319,7 @@ data:
},
"targets": [
{
"expr": "topk(10, rate(traefik_router_requests_total[5m]))",
"expr": "topk(10, sum by (router) (rate(traefik_router_requests_total[5m])))",
"refId": "A",
"legendFormat": "{{router}}"
}

View File

@ -348,7 +348,7 @@ data:
},
"targets": [
{
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > 600) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0))",
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)))",
"refId": "A"
}
],
@ -416,8 +416,7 @@ data:
"targets": [
{
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A",
"legendFormat": "{{node}}"
"refId": "A"
}
],
"fieldConfig": {
@ -446,7 +445,8 @@ data:
"unit": "percent",
"custom": {
"displayMode": "auto"
}
},
"displayName": "{{node}}"
},
"overrides": []
},
@ -488,8 +488,7 @@ data:
"targets": [
{
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A",
"legendFormat": "{{node}}"
"refId": "A"
}
],
"fieldConfig": {
@ -518,7 +517,8 @@ data:
"unit": "percent",
"custom": {
"displayMode": "auto"
}
},
"displayName": "{{node}}"
},
"overrides": []
},
@ -560,8 +560,7 @@ data:
"targets": [
{
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
"refId": "A",
"legendFormat": "{{node}}"
"refId": "A"
}
],
"fieldConfig": {
@ -583,10 +582,11 @@ data:
}
]
},
"unit": "bytes/sec",
"unit": "Bps",
"custom": {
"displayMode": "auto"
}
},
"displayName": "{{node}}"
},
"overrides": []
},
@ -628,8 +628,7 @@ data:
"targets": [
{
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
"refId": "A",
"legendFormat": "{{node}}"
"refId": "A"
}
],
"fieldConfig": {
@ -651,10 +650,11 @@ data:
}
]
},
"unit": "bytes/sec",
"unit": "Bps",
"custom": {
"displayMode": "auto"
}
},
"displayName": "{{node}}"
},
"overrides": []
},
@ -953,7 +953,7 @@ data:
],
"fieldConfig": {
"defaults": {
"unit": "bytes/sec"
"unit": "Bps"
},
"overrides": []
},
@ -996,7 +996,7 @@ data:
],
"fieldConfig": {
"defaults": {
"unit": "bytes/sec"
"unit": "Bps"
},
"overrides": []
},
@ -1315,7 +1315,7 @@ data:
}
]
},
"unit": "bytesSI",
"unit": "decbytes",
"custom": {
"displayMode": "auto"
}
@ -1382,7 +1382,7 @@ data:
}
]
},
"unit": "bytesSI",
"unit": "decbytes",
"custom": {
"displayMode": "auto"
}

View File

@ -149,7 +149,7 @@ data:
},
"targets": [
{
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > 600) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0))",
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)))",
"refId": "A"
}
],
@ -341,7 +341,7 @@ data:
},
"targets": [
{
"expr": "((time() - kube_pod_deletion_timestamp{pod!=\"\"}) * on(namespace,pod) group_left(node) kube_pod_info) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0",
"expr": "(((time() - kube_pod_deletion_timestamp{pod!=\"\"}) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)) * on(namespace,pod) group_left(node) kube_pod_info)",
"refId": "A"
}
],

View File

@ -180,7 +180,7 @@ data:
}
]
},
"unit": "bytesSI",
"unit": "decbytes",
"custom": {
"displayMode": "auto"
}
@ -240,7 +240,7 @@ data:
}
]
},
"unit": "bytesSI",
"unit": "decbytes",
"custom": {
"displayMode": "auto"
}
@ -277,7 +277,7 @@ data:
},
"targets": [
{
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
"expr": "(avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-1[2-9]|titan-2[24]\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"refId": "A",
"legendFormat": "{{node}}"
}
@ -315,7 +315,7 @@ data:
},
"targets": [
{
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
"expr": "(avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-1[2-9]|titan-2[24]\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"refId": "A",
"legendFormat": "{{node}}"
}