monitoring: adjust overview spacing and net panels

This commit is contained in:
Brad Stein 2025-11-18 15:55:24 -03:00
parent 7b2a69cfe3
commit c7b7bc7a6d
5 changed files with 56 additions and 44 deletions

View File

@ -81,6 +81,7 @@ CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
WORKER_SUFFIX = f"/{WORKER_TOTAL}" WORKER_SUFFIX = f"/{WORKER_TOTAL}"
CP_ALLOWED_NS = "kube-system|kube-public|kube-node-lease|longhorn-system|monitoring" CP_ALLOWED_NS = "kube-system|kube-public|kube-node-lease|longhorn-system|monitoring"
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]" LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
GAUGE_WIDTHS = [5, 5, 5, 5, 4]
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# PromQL helpers # PromQL helpers
@ -262,13 +263,18 @@ TRAEFIK_NET_EGRESS = (
'sum(rate(container_network_transmit_bytes_total{namespace="traefik",pod=~"traefik-.*"}[5m]))' 'sum(rate(container_network_transmit_bytes_total{namespace="traefik",pod=~"traefik-.*"}[5m]))'
" or on() vector(0)" " or on() vector(0)"
) )
NET_TOTAL_EXPR = ( NET_CLUSTER_RX = (
'sum(rate(container_network_receive_bytes_total{namespace!="",pod!="",container!=""}[5m]))'
" or on() vector(0)"
)
NET_CLUSTER_TX = (
'sum(rate(container_network_transmit_bytes_total{namespace!="",pod!="",container!=""}[5m]))' 'sum(rate(container_network_transmit_bytes_total{namespace!="",pod!="",container!=""}[5m]))'
" or on() vector(0)" " or on() vector(0)"
) )
NET_TOTAL_EXPR = NET_CLUSTER_TX
NET_INGRESS_EXPR = TRAEFIK_NET_INGRESS NET_INGRESS_EXPR = TRAEFIK_NET_INGRESS
NET_EGRESS_EXPR = TRAEFIK_NET_EGRESS NET_EGRESS_EXPR = TRAEFIK_NET_EGRESS
NET_INTERNAL_EXPR = f"clamp_min(({NET_TOTAL_EXPR}) - ({TRAEFIK_NET_EGRESS}), 0)" NET_INTERNAL_EXPR = f"clamp_min((({NET_CLUSTER_RX}) + ({NET_CLUSTER_TX})) - (({TRAEFIK_NET_INGRESS}) + ({TRAEFIK_NET_EGRESS})), 0)"
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Panel factories # Panel factories
@ -534,6 +540,11 @@ def build_overview():
link_to("atlas-pods"), link_to("atlas-pods"),
), ),
] ]
def gauge_grid(idx):
width = GAUGE_WIDTHS[idx] if idx < len(GAUGE_WIDTHS) else 4
x = sum(GAUGE_WIDTHS[:idx])
return width, x
for idx, (panel_id, title, expr, suffix, ok_value, links) in enumerate(row1_stats): for idx, (panel_id, title, expr, suffix, ok_value, links) in enumerate(row1_stats):
thresholds = None thresholds = None
min_value = 0 min_value = 0
@ -577,12 +588,13 @@ def build_overview():
{"color": "red", "value": max_value}, {"color": "red", "value": max_value},
], ],
} }
width, x = gauge_grid(idx)
panels.append( panels.append(
gauge_panel( gauge_panel(
panel_id, panel_id,
title, title,
expr, expr,
{"h": 5, "w": 4, "x": 4 * idx, "y": 0}, {"h": 5, "w": width, "x": x, "y": 0},
min_value=min_value, min_value=min_value,
max_value=max_value, max_value=max_value,
thresholds=thresholds, thresholds=thresholds,
@ -662,7 +674,7 @@ def build_overview():
14, 14,
"Worker node CPU", "Worker node CPU",
node_cpu_expr(worker_filter), node_cpu_expr(worker_filter),
{"h": 8, "w": 12, "x": 0, "y": 25}, {"h": 8, "w": 12, "x": 0, "y": 32},
unit="percent", unit="percent",
legend="{{node}}", legend="{{node}}",
legend_calcs=["last"], legend_calcs=["last"],
@ -676,7 +688,7 @@ def build_overview():
15, 15,
"Worker node RAM", "Worker node RAM",
node_mem_expr(worker_filter), node_mem_expr(worker_filter),
{"h": 8, "w": 12, "x": 12, "y": 25}, {"h": 8, "w": 12, "x": 12, "y": 32},
unit="percent", unit="percent",
legend="{{node}}", legend="{{node}}",
legend_calcs=["last"], legend_calcs=["last"],
@ -691,7 +703,7 @@ def build_overview():
16, 16,
"Control plane CPU", "Control plane CPU",
node_cpu_expr(CONTROL_REGEX), node_cpu_expr(CONTROL_REGEX),
{"h": 7, "w": 12, "x": 0, "y": 33}, {"h": 7, "w": 12, "x": 0, "y": 40},
unit="percent", unit="percent",
legend="{{node}}", legend="{{node}}",
legend_display="table", legend_display="table",
@ -703,7 +715,7 @@ def build_overview():
17, 17,
"Control plane RAM", "Control plane RAM",
node_mem_expr(CONTROL_REGEX), node_mem_expr(CONTROL_REGEX),
{"h": 7, "w": 12, "x": 12, "y": 33}, {"h": 7, "w": 12, "x": 12, "y": 40},
unit="percent", unit="percent",
legend="{{node}}", legend="{{node}}",
legend_display="table", legend_display="table",
@ -716,7 +728,7 @@ def build_overview():
18, 18,
"Cluster ingress throughput", "Cluster ingress throughput",
NET_INGRESS_EXPR, NET_INGRESS_EXPR,
{"h": 7, "w": 8, "x": 0, "y": 40}, {"h": 7, "w": 8, "x": 0, "y": 25},
unit="Bps", unit="Bps",
legend="Ingress (Traefik)", legend="Ingress (Traefik)",
legend_display="list", legend_display="list",
@ -729,7 +741,7 @@ def build_overview():
19, 19,
"Cluster egress throughput", "Cluster egress throughput",
NET_EGRESS_EXPR, NET_EGRESS_EXPR,
{"h": 7, "w": 8, "x": 8, "y": 40}, {"h": 7, "w": 8, "x": 8, "y": 25},
unit="Bps", unit="Bps",
legend="Egress (Traefik)", legend="Egress (Traefik)",
legend_display="list", legend_display="list",
@ -742,7 +754,7 @@ def build_overview():
20, 20,
"Intra-cluster throughput", "Intra-cluster throughput",
NET_INTERNAL_EXPR, NET_INTERNAL_EXPR,
{"h": 7, "w": 8, "x": 16, "y": 40}, {"h": 7, "w": 8, "x": 16, "y": 25},
unit="Bps", unit="Bps",
legend="Internal traffic", legend="Internal traffic",
legend_display="list", legend_display="list",

View File

@ -140,7 +140,7 @@
}, },
"targets": [ "targets": [
{ {
"expr": "clamp_min((sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) - (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)), 0)", "expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
"refId": "A" "refId": "A"
} }
], ],

View File

@ -17,7 +17,7 @@
}, },
"gridPos": { "gridPos": {
"h": 5, "h": 5,
"w": 4, "w": 5,
"x": 0, "x": 0,
"y": 0 "y": 0
}, },
@ -78,8 +78,8 @@
}, },
"gridPos": { "gridPos": {
"h": 5, "h": 5,
"w": 4, "w": 5,
"x": 4, "x": 5,
"y": 0 "y": 0
}, },
"targets": [ "targets": [
@ -131,8 +131,8 @@
}, },
"gridPos": { "gridPos": {
"h": 5, "h": 5,
"w": 4, "w": 5,
"x": 8, "x": 10,
"y": 0 "y": 0
}, },
"targets": [ "targets": [
@ -199,8 +199,8 @@
}, },
"gridPos": { "gridPos": {
"h": 5, "h": 5,
"w": 4, "w": 5,
"x": 12, "x": 15,
"y": 0 "y": 0
}, },
"targets": [ "targets": [
@ -268,7 +268,7 @@
"gridPos": { "gridPos": {
"h": 5, "h": 5,
"w": 4, "w": 4,
"x": 16, "x": 20,
"y": 0 "y": 0
}, },
"targets": [ "targets": [
@ -1056,7 +1056,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 25 "y": 32
}, },
"targets": [ "targets": [
{ {
@ -1103,7 +1103,7 @@
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 12, "x": 12,
"y": 25 "y": 32
}, },
"targets": [ "targets": [
{ {
@ -1150,7 +1150,7 @@
"h": 7, "h": 7,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 33 "y": 40
}, },
"targets": [ "targets": [
{ {
@ -1187,7 +1187,7 @@
"h": 7, "h": 7,
"w": 12, "w": 12,
"x": 12, "x": 12,
"y": 33 "y": 40
}, },
"targets": [ "targets": [
{ {
@ -1224,7 +1224,7 @@
"h": 7, "h": 7,
"w": 8, "w": 8,
"x": 0, "x": 0,
"y": 40 "y": 25
}, },
"targets": [ "targets": [
{ {
@ -1268,7 +1268,7 @@
"h": 7, "h": 7,
"w": 8, "w": 8,
"x": 8, "x": 8,
"y": 40 "y": 25
}, },
"targets": [ "targets": [
{ {
@ -1312,11 +1312,11 @@
"h": 7, "h": 7,
"w": 8, "w": 8,
"x": 16, "x": 16,
"y": 40 "y": 25
}, },
"targets": [ "targets": [
{ {
"expr": "clamp_min((sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) - (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)), 0)", "expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
"refId": "A", "refId": "A",
"legendFormat": "Internal traffic" "legendFormat": "Internal traffic"
} }

View File

@ -149,7 +149,7 @@ data:
}, },
"targets": [ "targets": [
{ {
"expr": "clamp_min((sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) - (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)), 0)", "expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
"refId": "A" "refId": "A"
} }
], ],

View File

@ -26,7 +26,7 @@ data:
}, },
"gridPos": { "gridPos": {
"h": 5, "h": 5,
"w": 4, "w": 5,
"x": 0, "x": 0,
"y": 0 "y": 0
}, },
@ -87,8 +87,8 @@ data:
}, },
"gridPos": { "gridPos": {
"h": 5, "h": 5,
"w": 4, "w": 5,
"x": 4, "x": 5,
"y": 0 "y": 0
}, },
"targets": [ "targets": [
@ -140,8 +140,8 @@ data:
}, },
"gridPos": { "gridPos": {
"h": 5, "h": 5,
"w": 4, "w": 5,
"x": 8, "x": 10,
"y": 0 "y": 0
}, },
"targets": [ "targets": [
@ -208,8 +208,8 @@ data:
}, },
"gridPos": { "gridPos": {
"h": 5, "h": 5,
"w": 4, "w": 5,
"x": 12, "x": 15,
"y": 0 "y": 0
}, },
"targets": [ "targets": [
@ -277,7 +277,7 @@ data:
"gridPos": { "gridPos": {
"h": 5, "h": 5,
"w": 4, "w": 4,
"x": 16, "x": 20,
"y": 0 "y": 0
}, },
"targets": [ "targets": [
@ -1065,7 +1065,7 @@ data:
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 25 "y": 32
}, },
"targets": [ "targets": [
{ {
@ -1112,7 +1112,7 @@ data:
"h": 8, "h": 8,
"w": 12, "w": 12,
"x": 12, "x": 12,
"y": 25 "y": 32
}, },
"targets": [ "targets": [
{ {
@ -1159,7 +1159,7 @@ data:
"h": 7, "h": 7,
"w": 12, "w": 12,
"x": 0, "x": 0,
"y": 33 "y": 40
}, },
"targets": [ "targets": [
{ {
@ -1196,7 +1196,7 @@ data:
"h": 7, "h": 7,
"w": 12, "w": 12,
"x": 12, "x": 12,
"y": 33 "y": 40
}, },
"targets": [ "targets": [
{ {
@ -1233,7 +1233,7 @@ data:
"h": 7, "h": 7,
"w": 8, "w": 8,
"x": 0, "x": 0,
"y": 40 "y": 25
}, },
"targets": [ "targets": [
{ {
@ -1277,7 +1277,7 @@ data:
"h": 7, "h": 7,
"w": 8, "w": 8,
"x": 8, "x": 8,
"y": 40 "y": 25
}, },
"targets": [ "targets": [
{ {
@ -1321,11 +1321,11 @@ data:
"h": 7, "h": 7,
"w": 8, "w": 8,
"x": 16, "x": 16,
"y": 40 "y": 25
}, },
"targets": [ "targets": [
{ {
"expr": "clamp_min((sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) - (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)), 0)", "expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
"refId": "A", "refId": "A",
"legendFormat": "Internal traffic" "legendFormat": "Internal traffic"
} }