monitoring: polish dashboards

This commit is contained in:
Brad Stein 2025-11-17 18:55:11 -03:00
parent 8f5781d3cf
commit 349d9c56ac
9 changed files with 104 additions and 93 deletions

View File

@ -80,6 +80,7 @@ WORKER_TOTAL = len(WORKER_NODES)
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}" CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
WORKER_SUFFIX = f"/{WORKER_TOTAL}" WORKER_SUFFIX = f"/{WORKER_TOTAL}"
CP_ALLOWED_NS = "kube-system|kube-public|kube-node-lease|longhorn-system|monitoring" CP_ALLOWED_NS = "kube-system|kube-public|kube-node-lease|longhorn-system|monitoring"
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# PromQL helpers # PromQL helpers
@ -149,9 +150,10 @@ CRASHLOOP_EXPR = (
'{reason=~"CrashLoopBackOff|ImagePullBackOff"}))' '{reason=~"CrashLoopBackOff|ImagePullBackOff"}))'
) )
STUCK_TERMINATING_EXPR = ( STUCK_TERMINATING_EXPR = (
'sum(max by (namespace,pod) ((' 'sum(max by (namespace,pod) ('
'(time() - kube_pod_deletion_timestamp{pod!=""}) > 600' '((time() - kube_pod_deletion_timestamp{pod!=""}) > bool 600)'
') and on(namespace,pod) kube_pod_deletion_timestamp{pod!=""} > 0))' ' and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=""} > bool 0)'
'))'
) )
PROBLEM_TABLE_EXPR = ( PROBLEM_TABLE_EXPR = (
@ -168,9 +170,11 @@ CRASHLOOP_TABLE_EXPR = (
"(kube_pod_container_status_waiting_reason{reason=~\"CrashLoopBackOff|ImagePullBackOff\"})" "(kube_pod_container_status_waiting_reason{reason=~\"CrashLoopBackOff|ImagePullBackOff\"})"
) )
STUCK_TABLE_EXPR = ( STUCK_TABLE_EXPR = (
"("
"((time() - kube_pod_deletion_timestamp{pod!=\"\"}) " "((time() - kube_pod_deletion_timestamp{pod!=\"\"}) "
"* on(namespace,pod) group_left(node) kube_pod_info) " "and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)) "
"and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0" "* on(namespace,pod) group_left(node) kube_pod_info"
")"
) )
NAMESPACE_CPU_EXPR = ( NAMESPACE_CPU_EXPR = (
@ -192,6 +196,7 @@ IO_SERIES_EXPR = (
"+ rate(node_disk_written_bytes_total[5m]))" "+ rate(node_disk_written_bytes_total[5m]))"
) )
IO_TOP_EXPR = f"topk(1, {IO_SERIES_EXPR})" IO_TOP_EXPR = f"topk(1, {IO_SERIES_EXPR})"
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
NET_INGRESS_EXPR = ( NET_INGRESS_EXPR = (
'sum(rate(container_network_receive_bytes_total{namespace!="" ,pod!=""}[5m])) ' 'sum(rate(container_network_receive_bytes_total{namespace!="" ,pod!=""}[5m])) '
"or on() vector(0)" "or on() vector(0)"
@ -216,6 +221,7 @@ def stat_panel(
thresholds=None, thresholds=None,
text_mode="value", text_mode="value",
legend=None, legend=None,
display_name=None,
value_suffix=None, value_suffix=None,
links=None, links=None,
): ):
@ -236,6 +242,8 @@ def stat_panel(
} }
if value_suffix: if value_suffix:
defaults["custom"]["valueSuffix"] = value_suffix defaults["custom"]["valueSuffix"] = value_suffix
if display_name:
defaults["displayName"] = display_name
panel = { panel = {
"id": panel_id, "id": panel_id,
"type": "stat", "type": "stat",
@ -449,8 +457,8 @@ def build_overview():
hottest = [ hottest = [
(7, "Hottest node: CPU", f"topk(1, {node_cpu_expr()})", "percent"), (7, "Hottest node: CPU", f"topk(1, {node_cpu_expr()})", "percent"),
(8, "Hottest node: RAM", f"topk(1, {node_mem_expr()})", "percent"), (8, "Hottest node: RAM", f"topk(1, {node_mem_expr()})", "percent"),
(9, "Hottest node: NET", NET_TOP_EXPR, "bytes/sec"), (9, "Hottest node: NET", NET_TOP_EXPR, "Bps"),
(10, "Hottest node: I/O", IO_TOP_EXPR, "bytes/sec"), (10, "Hottest node: I/O", IO_TOP_EXPR, "Bps"),
] ]
for idx, (panel_id, title, expr, unit) in enumerate(hottest): for idx, (panel_id, title, expr, unit) in enumerate(hottest):
panels.append( panels.append(
@ -462,7 +470,7 @@ def build_overview():
unit=unit, unit=unit,
thresholds=PERCENT_THRESHOLDS if unit == "percent" else None, thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
text_mode="value_and_name", text_mode="value_and_name",
legend="{{node}}", display_name="{{node}}",
links=link_to("atlas-nodes"), links=link_to("atlas-nodes"),
) )
) )
@ -544,7 +552,7 @@ def build_overview():
"Cluster ingress throughput", "Cluster ingress throughput",
NET_INGRESS_EXPR, NET_INGRESS_EXPR,
{"h": 7, "w": 12, "x": 0, "y": 34}, {"h": 7, "w": 12, "x": 0, "y": 34},
unit="bytes/sec", unit="Bps",
legend_display="list", legend_display="list",
legend_placement="bottom", legend_placement="bottom",
links=link_to("atlas-network"), links=link_to("atlas-network"),
@ -556,7 +564,7 @@ def build_overview():
"Cluster egress throughput", "Cluster egress throughput",
NET_EGRESS_EXPR, NET_EGRESS_EXPR,
{"h": 7, "w": 12, "x": 12, "y": 34}, {"h": 7, "w": 12, "x": 12, "y": 34},
unit="bytes/sec", unit="Bps",
legend_display="list", legend_display="list",
legend_placement="bottom", legend_placement="bottom",
links=link_to("atlas-network"), links=link_to("atlas-network"),
@ -616,8 +624,8 @@ def build_overview():
storage_panels = [ storage_panels = [
(21, "Astreae usage", astreae_usage_expr("/mnt/astreae"), "percent"), (21, "Astreae usage", astreae_usage_expr("/mnt/astreae"), "percent"),
(22, "Asteria usage", astreae_usage_expr("/mnt/asteria"), "percent"), (22, "Asteria usage", astreae_usage_expr("/mnt/asteria"), "percent"),
(23, "Astreae free", astreae_free_expr("/mnt/astreae"), "bytesSI"), (23, "Astreae free", astreae_free_expr("/mnt/astreae"), "decbytes"),
(24, "Asteria free", astreae_free_expr("/mnt/asteria"), "bytesSI"), (24, "Asteria free", astreae_free_expr("/mnt/asteria"), "decbytes"),
] ]
for idx, (panel_id, title, expr, unit) in enumerate(storage_panels): for idx, (panel_id, title, expr, unit) in enumerate(storage_panels):
panels.append( panels.append(
@ -911,7 +919,7 @@ def build_storage_dashboard():
"Astreae free", "Astreae free",
astreae_free_expr("/mnt/astreae"), astreae_free_expr("/mnt/astreae"),
{"h": 5, "w": 6, "x": 12, "y": 0}, {"h": 5, "w": 6, "x": 12, "y": 0},
unit="bytesSI", unit="decbytes",
) )
) )
panels.append( panels.append(
@ -920,14 +928,14 @@ def build_storage_dashboard():
"Asteria free", "Asteria free",
astreae_free_expr("/mnt/asteria"), astreae_free_expr("/mnt/asteria"),
{"h": 5, "w": 6, "x": 18, "y": 0}, {"h": 5, "w": 6, "x": 18, "y": 0},
unit="bytesSI", unit="decbytes",
) )
) )
panels.append( panels.append(
timeseries_panel( timeseries_panel(
5, 5,
"Astreae per-node usage", "Astreae per-node usage",
filesystem_usage_expr("/mnt/astreae"), filesystem_usage_expr("/mnt/astreae", LONGHORN_NODE_REGEX),
{"h": 9, "w": 12, "x": 0, "y": 5}, {"h": 9, "w": 12, "x": 0, "y": 5},
unit="percent", unit="percent",
legend="{{node}}", legend="{{node}}",
@ -940,7 +948,7 @@ def build_storage_dashboard():
timeseries_panel( timeseries_panel(
6, 6,
"Asteria per-node usage", "Asteria per-node usage",
filesystem_usage_expr("/mnt/asteria"), filesystem_usage_expr("/mnt/asteria", LONGHORN_NODE_REGEX),
{"h": 9, "w": 12, "x": 12, "y": 5}, {"h": 9, "w": 12, "x": 12, "y": 5},
unit="percent", unit="percent",
legend="{{node}}", legend="{{node}}",
@ -986,18 +994,19 @@ def build_storage_dashboard():
def build_network_dashboard(): def build_network_dashboard():
panels = [] panels = []
panels.append( panels.append(
stat_panel(1, "Ingress bytes/s", NET_INGRESS_EXPR, {"h": 4, "w": 8, "x": 0, "y": 0}, unit="bytes/sec") stat_panel(1, "Ingress traffic", NET_INGRESS_EXPR, {"h": 4, "w": 8, "x": 0, "y": 0}, unit="Bps")
) )
panels.append( panels.append(
stat_panel(2, "Egress bytes/s", NET_EGRESS_EXPR, {"h": 4, "w": 8, "x": 8, "y": 0}, unit="bytes/sec") stat_panel(2, "Egress traffic", NET_EGRESS_EXPR, {"h": 4, "w": 8, "x": 8, "y": 0}, unit="Bps")
) )
panels.append( panels.append(
stat_panel( stat_panel(
3, 3,
"Top router req/s", "Top router req/s",
'max(topk(1, rate(traefik_router_requests_total[5m])))', f"topk(1, {TRAEFIK_ROUTER_EXPR})",
{"h": 4, "w": 8, "x": 16, "y": 0}, {"h": 4, "w": 8, "x": 16, "y": 0},
unit="req/s", unit="req/s",
display_name="{{router}}",
) )
) )
panels.append( panels.append(
@ -1006,7 +1015,7 @@ def build_network_dashboard():
"Per-node throughput", "Per-node throughput",
NET_SERIES_EXPR, NET_SERIES_EXPR,
{"h": 8, "w": 24, "x": 0, "y": 4}, {"h": 8, "w": 24, "x": 0, "y": 4},
unit="bytes/sec", unit="Bps",
legend="{{node}}", legend="{{node}}",
legend_display="table", legend_display="table",
legend_placement="right", legend_placement="right",
@ -1019,7 +1028,7 @@ def build_network_dashboard():
'topk(10, sum(rate(container_network_transmit_bytes_total{namespace!=""}[5m]) ' 'topk(10, sum(rate(container_network_transmit_bytes_total{namespace!=""}[5m]) '
'+ rate(container_network_receive_bytes_total{namespace!=""}[5m])) by (namespace))', '+ rate(container_network_receive_bytes_total{namespace!=""}[5m])) by (namespace))',
{"h": 9, "w": 12, "x": 0, "y": 12}, {"h": 9, "w": 12, "x": 0, "y": 12},
unit="bytes/sec", unit="Bps",
transformations=[{"id": "labelsToFields", "options": {}}], transformations=[{"id": "labelsToFields", "options": {}}],
) )
) )
@ -1030,7 +1039,7 @@ def build_network_dashboard():
'topk(10, sum(rate(container_network_transmit_bytes_total{pod!=""}[5m]) ' 'topk(10, sum(rate(container_network_transmit_bytes_total{pod!=""}[5m]) '
'+ rate(container_network_receive_bytes_total{pod!=""}[5m])) by (namespace,pod))', '+ rate(container_network_receive_bytes_total{pod!=""}[5m])) by (namespace,pod))',
{"h": 9, "w": 12, "x": 12, "y": 12}, {"h": 9, "w": 12, "x": 12, "y": 12},
unit="bytes/sec", unit="Bps",
transformations=[{"id": "labelsToFields", "options": {}}], transformations=[{"id": "labelsToFields", "options": {}}],
) )
) )
@ -1038,7 +1047,7 @@ def build_network_dashboard():
timeseries_panel( timeseries_panel(
7, 7,
"Traefik routers (req/s)", "Traefik routers (req/s)",
'topk(10, rate(traefik_router_requests_total[5m]))', f"topk(10, {TRAEFIK_ROUTER_EXPR})",
{"h": 9, "w": 12, "x": 0, "y": 21}, {"h": 9, "w": 12, "x": 0, "y": 21},
unit="req/s", unit="req/s",
legend="{{router}}", legend="{{router}}",

View File

@ -7,7 +7,7 @@
{ {
"id": 1, "id": 1,
"type": "stat", "type": "stat",
"title": "Ingress bytes/s", "title": "Ingress traffic",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "atlas-vm" "uid": "atlas-vm"
@ -43,7 +43,7 @@
} }
] ]
}, },
"unit": "bytes/sec", "unit": "Bps",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} }
@ -67,7 +67,7 @@
{ {
"id": 2, "id": 2,
"type": "stat", "type": "stat",
"title": "Egress bytes/s", "title": "Egress traffic",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "atlas-vm" "uid": "atlas-vm"
@ -103,7 +103,7 @@
} }
] ]
}, },
"unit": "bytes/sec", "unit": "Bps",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} }
@ -140,7 +140,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "max(topk(1, rate(traefik_router_requests_total[5m])))", "expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
"refId": "A" "refId": "A"
} }
], ],
@ -166,7 +166,8 @@
"unit": "req/s", "unit": "req/s",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} },
"displayName": "{{router}}"
}, },
"overrides": [] "overrides": []
}, },
@ -207,7 +208,7 @@
], ],
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"unit": "bytes/sec" "unit": "Bps"
}, },
"overrides": [] "overrides": []
}, },
@ -243,7 +244,7 @@
], ],
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"unit": "bytes/sec" "unit": "Bps"
}, },
"overrides": [] "overrides": []
}, },
@ -279,7 +280,7 @@
], ],
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"unit": "bytes/sec" "unit": "Bps"
}, },
"overrides": [] "overrides": []
}, },
@ -309,7 +310,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "topk(10, rate(traefik_router_requests_total[5m]))", "expr": "topk(10, sum by (router) (rate(traefik_router_requests_total[5m])))",
"refId": "A", "refId": "A",
"legendFormat": "{{router}}" "legendFormat": "{{router}}"
} }

View File

@ -339,7 +339,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > 600) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0))", "expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)))",
"refId": "A" "refId": "A"
} }
], ],
@ -407,8 +407,7 @@
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))", "expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A", "refId": "A"
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -437,7 +436,8 @@
"unit": "percent", "unit": "percent",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} },
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },
@ -479,8 +479,7 @@
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))", "expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A", "refId": "A"
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -509,7 +508,8 @@
"unit": "percent", "unit": "percent",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} },
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },
@ -551,8 +551,7 @@
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))", "expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
"refId": "A", "refId": "A"
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -574,10 +573,11 @@
} }
] ]
}, },
"unit": "bytes/sec", "unit": "Bps",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} },
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },
@ -619,8 +619,7 @@
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))", "expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
"refId": "A", "refId": "A"
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -642,10 +641,11 @@
} }
] ]
}, },
"unit": "bytes/sec", "unit": "Bps",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} },
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },
@ -944,7 +944,7 @@
], ],
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"unit": "bytes/sec" "unit": "Bps"
}, },
"overrides": [] "overrides": []
}, },
@ -987,7 +987,7 @@
], ],
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"unit": "bytes/sec" "unit": "Bps"
}, },
"overrides": [] "overrides": []
}, },
@ -1306,7 +1306,7 @@
} }
] ]
}, },
"unit": "bytesSI", "unit": "decbytes",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} }
@ -1373,7 +1373,7 @@
} }
] ]
}, },
"unit": "bytesSI", "unit": "decbytes",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} }

View File

@ -140,7 +140,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > 600) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0))", "expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)))",
"refId": "A" "refId": "A"
} }
], ],
@ -332,7 +332,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "((time() - kube_pod_deletion_timestamp{pod!=\"\"}) * on(namespace,pod) group_left(node) kube_pod_info) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0", "expr": "(((time() - kube_pod_deletion_timestamp{pod!=\"\"}) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)) * on(namespace,pod) group_left(node) kube_pod_info)",
"refId": "A" "refId": "A"
} }
], ],

View File

@ -171,7 +171,7 @@
} }
] ]
}, },
"unit": "bytesSI", "unit": "decbytes",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} }
@ -231,7 +231,7 @@
} }
] ]
}, },
"unit": "bytesSI", "unit": "decbytes",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} }
@ -268,7 +268,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))", "expr": "(avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-1[2-9]|titan-2[24]\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"refId": "A", "refId": "A",
"legendFormat": "{{node}}" "legendFormat": "{{node}}"
} }
@ -306,7 +306,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))", "expr": "(avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-1[2-9]|titan-2[24]\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"refId": "A", "refId": "A",
"legendFormat": "{{node}}" "legendFormat": "{{node}}"
} }

View File

@ -16,7 +16,7 @@ data:
{ {
"id": 1, "id": 1,
"type": "stat", "type": "stat",
"title": "Ingress bytes/s", "title": "Ingress traffic",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "atlas-vm" "uid": "atlas-vm"
@ -52,7 +52,7 @@ data:
} }
] ]
}, },
"unit": "bytes/sec", "unit": "Bps",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} }
@ -76,7 +76,7 @@ data:
{ {
"id": 2, "id": 2,
"type": "stat", "type": "stat",
"title": "Egress bytes/s", "title": "Egress traffic",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "atlas-vm" "uid": "atlas-vm"
@ -112,7 +112,7 @@ data:
} }
] ]
}, },
"unit": "bytes/sec", "unit": "Bps",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} }
@ -149,7 +149,7 @@ data:
}, },
"targets": [ "targets": [
{ {
"expr": "max(topk(1, rate(traefik_router_requests_total[5m])))", "expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
"refId": "A" "refId": "A"
} }
], ],
@ -175,7 +175,8 @@ data:
"unit": "req/s", "unit": "req/s",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} },
"displayName": "{{router}}"
}, },
"overrides": [] "overrides": []
}, },
@ -216,7 +217,7 @@ data:
], ],
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"unit": "bytes/sec" "unit": "Bps"
}, },
"overrides": [] "overrides": []
}, },
@ -252,7 +253,7 @@ data:
], ],
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"unit": "bytes/sec" "unit": "Bps"
}, },
"overrides": [] "overrides": []
}, },
@ -288,7 +289,7 @@ data:
], ],
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"unit": "bytes/sec" "unit": "Bps"
}, },
"overrides": [] "overrides": []
}, },
@ -318,7 +319,7 @@ data:
}, },
"targets": [ "targets": [
{ {
"expr": "topk(10, rate(traefik_router_requests_total[5m]))", "expr": "topk(10, sum by (router) (rate(traefik_router_requests_total[5m])))",
"refId": "A", "refId": "A",
"legendFormat": "{{router}}" "legendFormat": "{{router}}"
} }

View File

@ -348,7 +348,7 @@ data:
}, },
"targets": [ "targets": [
{ {
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > 600) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0))", "expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)))",
"refId": "A" "refId": "A"
} }
], ],
@ -416,8 +416,7 @@ data:
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))", "expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A", "refId": "A"
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -446,7 +445,8 @@ data:
"unit": "percent", "unit": "percent",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} },
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },
@ -488,8 +488,7 @@ data:
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))", "expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A", "refId": "A"
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -518,7 +517,8 @@ data:
"unit": "percent", "unit": "percent",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} },
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },
@ -560,8 +560,7 @@ data:
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))", "expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
"refId": "A", "refId": "A"
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -583,10 +582,11 @@ data:
} }
] ]
}, },
"unit": "bytes/sec", "unit": "Bps",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} },
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },
@ -628,8 +628,7 @@ data:
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))", "expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
"refId": "A", "refId": "A"
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -651,10 +650,11 @@ data:
} }
] ]
}, },
"unit": "bytes/sec", "unit": "Bps",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} },
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },
@ -953,7 +953,7 @@ data:
], ],
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"unit": "bytes/sec" "unit": "Bps"
}, },
"overrides": [] "overrides": []
}, },
@ -996,7 +996,7 @@ data:
], ],
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
"unit": "bytes/sec" "unit": "Bps"
}, },
"overrides": [] "overrides": []
}, },
@ -1315,7 +1315,7 @@ data:
} }
] ]
}, },
"unit": "bytesSI", "unit": "decbytes",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} }
@ -1382,7 +1382,7 @@ data:
} }
] ]
}, },
"unit": "bytesSI", "unit": "decbytes",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} }

View File

@ -149,7 +149,7 @@ data:
}, },
"targets": [ "targets": [
{ {
"expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > 600) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0))", "expr": "sum(max by (namespace,pod) (((time() - kube_pod_deletion_timestamp{pod!=\"\"}) > bool 600) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)))",
"refId": "A" "refId": "A"
} }
], ],
@ -341,7 +341,7 @@ data:
}, },
"targets": [ "targets": [
{ {
"expr": "((time() - kube_pod_deletion_timestamp{pod!=\"\"}) * on(namespace,pod) group_left(node) kube_pod_info) and on(namespace,pod) kube_pod_deletion_timestamp{pod!=\"\"} > 0", "expr": "(((time() - kube_pod_deletion_timestamp{pod!=\"\"}) and on(namespace,pod) (kube_pod_deletion_timestamp{pod!=\"\"} > bool 0)) * on(namespace,pod) group_left(node) kube_pod_info)",
"refId": "A" "refId": "A"
} }
], ],

View File

@ -180,7 +180,7 @@ data:
} }
] ]
}, },
"unit": "bytesSI", "unit": "decbytes",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} }
@ -240,7 +240,7 @@ data:
} }
] ]
}, },
"unit": "bytesSI", "unit": "decbytes",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
} }
@ -277,7 +277,7 @@ data:
}, },
"targets": [ "targets": [
{ {
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))", "expr": "(avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astreae\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-1[2-9]|titan-2[24]\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"refId": "A", "refId": "A",
"legendFormat": "{{node}}" "legendFormat": "{{node}}"
} }
@ -315,7 +315,7 @@ data:
}, },
"targets": [ "targets": [
{ {
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))", "expr": "(avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/asteria\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))) * on(node) group_left() label_replace(node_uname_info{nodename=~\"titan-1[2-9]|titan-2[24]\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")",
"refId": "A", "refId": "A",
"legendFormat": "{{node}}" "legendFormat": "{{node}}"
} }