monitoring: adjust overview spacing and net panels

This commit is contained in:
Brad Stein 2025-11-18 15:55:24 -03:00
parent 7b2a69cfe3
commit c7b7bc7a6d
5 changed files with 56 additions and 44 deletions

View File

@ -81,6 +81,7 @@ CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
CP_ALLOWED_NS = "kube-system|kube-public|kube-node-lease|longhorn-system|monitoring"
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
GAUGE_WIDTHS = [5, 5, 5, 5, 4]
# ---------------------------------------------------------------------------
# PromQL helpers
@ -262,13 +263,18 @@ TRAEFIK_NET_EGRESS = (
'sum(rate(container_network_transmit_bytes_total{namespace="traefik",pod=~"traefik-.*"}[5m]))'
" or on() vector(0)"
)
NET_TOTAL_EXPR = (
NET_CLUSTER_RX = (
'sum(rate(container_network_receive_bytes_total{namespace!="",pod!="",container!=""}[5m]))'
" or on() vector(0)"
)
NET_CLUSTER_TX = (
'sum(rate(container_network_transmit_bytes_total{namespace!="",pod!="",container!=""}[5m]))'
" or on() vector(0)"
)
NET_TOTAL_EXPR = NET_CLUSTER_TX
NET_INGRESS_EXPR = TRAEFIK_NET_INGRESS
NET_EGRESS_EXPR = TRAEFIK_NET_EGRESS
NET_INTERNAL_EXPR = f"clamp_min(({NET_TOTAL_EXPR}) - ({TRAEFIK_NET_EGRESS}), 0)"
NET_INTERNAL_EXPR = f"clamp_min((({NET_CLUSTER_RX}) + ({NET_CLUSTER_TX})) - (({TRAEFIK_NET_INGRESS}) + ({TRAEFIK_NET_EGRESS})), 0)"
# ---------------------------------------------------------------------------
# Panel factories
@ -534,6 +540,11 @@ def build_overview():
link_to("atlas-pods"),
),
]
def gauge_grid(idx):
width = GAUGE_WIDTHS[idx] if idx < len(GAUGE_WIDTHS) else 4
x = sum(GAUGE_WIDTHS[:idx])
return width, x
for idx, (panel_id, title, expr, suffix, ok_value, links) in enumerate(row1_stats):
thresholds = None
min_value = 0
@ -577,12 +588,13 @@ def build_overview():
{"color": "red", "value": max_value},
],
}
width, x = gauge_grid(idx)
panels.append(
gauge_panel(
panel_id,
title,
expr,
{"h": 5, "w": 4, "x": 4 * idx, "y": 0},
{"h": 5, "w": width, "x": x, "y": 0},
min_value=min_value,
max_value=max_value,
thresholds=thresholds,
@ -662,7 +674,7 @@ def build_overview():
14,
"Worker node CPU",
node_cpu_expr(worker_filter),
{"h": 8, "w": 12, "x": 0, "y": 25},
{"h": 8, "w": 12, "x": 0, "y": 32},
unit="percent",
legend="{{node}}",
legend_calcs=["last"],
@ -676,7 +688,7 @@ def build_overview():
15,
"Worker node RAM",
node_mem_expr(worker_filter),
{"h": 8, "w": 12, "x": 12, "y": 25},
{"h": 8, "w": 12, "x": 12, "y": 32},
unit="percent",
legend="{{node}}",
legend_calcs=["last"],
@ -691,7 +703,7 @@ def build_overview():
16,
"Control plane CPU",
node_cpu_expr(CONTROL_REGEX),
{"h": 7, "w": 12, "x": 0, "y": 33},
{"h": 7, "w": 12, "x": 0, "y": 40},
unit="percent",
legend="{{node}}",
legend_display="table",
@ -703,7 +715,7 @@ def build_overview():
17,
"Control plane RAM",
node_mem_expr(CONTROL_REGEX),
{"h": 7, "w": 12, "x": 12, "y": 33},
{"h": 7, "w": 12, "x": 12, "y": 40},
unit="percent",
legend="{{node}}",
legend_display="table",
@ -716,7 +728,7 @@ def build_overview():
18,
"Cluster ingress throughput",
NET_INGRESS_EXPR,
{"h": 7, "w": 8, "x": 0, "y": 40},
{"h": 7, "w": 8, "x": 0, "y": 25},
unit="Bps",
legend="Ingress (Traefik)",
legend_display="list",
@ -729,7 +741,7 @@ def build_overview():
19,
"Cluster egress throughput",
NET_EGRESS_EXPR,
{"h": 7, "w": 8, "x": 8, "y": 40},
{"h": 7, "w": 8, "x": 8, "y": 25},
unit="Bps",
legend="Egress (Traefik)",
legend_display="list",
@ -742,7 +754,7 @@ def build_overview():
20,
"Intra-cluster throughput",
NET_INTERNAL_EXPR,
{"h": 7, "w": 8, "x": 16, "y": 40},
{"h": 7, "w": 8, "x": 16, "y": 25},
unit="Bps",
legend="Internal traffic",
legend_display="list",

View File

@ -140,7 +140,7 @@
},
"targets": [
{
"expr": "clamp_min((sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) - (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)), 0)",
"expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
"refId": "A"
}
],

View File

@ -17,7 +17,7 @@
},
"gridPos": {
"h": 5,
"w": 4,
"w": 5,
"x": 0,
"y": 0
},
@ -78,8 +78,8 @@
},
"gridPos": {
"h": 5,
"w": 4,
"x": 4,
"w": 5,
"x": 5,
"y": 0
},
"targets": [
@ -131,8 +131,8 @@
},
"gridPos": {
"h": 5,
"w": 4,
"x": 8,
"w": 5,
"x": 10,
"y": 0
},
"targets": [
@ -199,8 +199,8 @@
},
"gridPos": {
"h": 5,
"w": 4,
"x": 12,
"w": 5,
"x": 15,
"y": 0
},
"targets": [
@ -268,7 +268,7 @@
"gridPos": {
"h": 5,
"w": 4,
"x": 16,
"x": 20,
"y": 0
},
"targets": [
@ -1056,7 +1056,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 25
"y": 32
},
"targets": [
{
@ -1103,7 +1103,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 25
"y": 32
},
"targets": [
{
@ -1150,7 +1150,7 @@
"h": 7,
"w": 12,
"x": 0,
"y": 33
"y": 40
},
"targets": [
{
@ -1187,7 +1187,7 @@
"h": 7,
"w": 12,
"x": 12,
"y": 33
"y": 40
},
"targets": [
{
@ -1224,7 +1224,7 @@
"h": 7,
"w": 8,
"x": 0,
"y": 40
"y": 25
},
"targets": [
{
@ -1268,7 +1268,7 @@
"h": 7,
"w": 8,
"x": 8,
"y": 40
"y": 25
},
"targets": [
{
@ -1312,11 +1312,11 @@
"h": 7,
"w": 8,
"x": 16,
"y": 40
"y": 25
},
"targets": [
{
"expr": "clamp_min((sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) - (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)), 0)",
"expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
"refId": "A",
"legendFormat": "Internal traffic"
}

View File

@ -149,7 +149,7 @@ data:
},
"targets": [
{
"expr": "clamp_min((sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) - (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)), 0)",
"expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
"refId": "A"
}
],

View File

@ -26,7 +26,7 @@ data:
},
"gridPos": {
"h": 5,
"w": 4,
"w": 5,
"x": 0,
"y": 0
},
@ -87,8 +87,8 @@ data:
},
"gridPos": {
"h": 5,
"w": 4,
"x": 4,
"w": 5,
"x": 5,
"y": 0
},
"targets": [
@ -140,8 +140,8 @@ data:
},
"gridPos": {
"h": 5,
"w": 4,
"x": 8,
"w": 5,
"x": 10,
"y": 0
},
"targets": [
@ -208,8 +208,8 @@ data:
},
"gridPos": {
"h": 5,
"w": 4,
"x": 12,
"w": 5,
"x": 15,
"y": 0
},
"targets": [
@ -277,7 +277,7 @@ data:
"gridPos": {
"h": 5,
"w": 4,
"x": 16,
"x": 20,
"y": 0
},
"targets": [
@ -1065,7 +1065,7 @@ data:
"h": 8,
"w": 12,
"x": 0,
"y": 25
"y": 32
},
"targets": [
{
@ -1112,7 +1112,7 @@ data:
"h": 8,
"w": 12,
"x": 12,
"y": 25
"y": 32
},
"targets": [
{
@ -1159,7 +1159,7 @@ data:
"h": 7,
"w": 12,
"x": 0,
"y": 33
"y": 40
},
"targets": [
{
@ -1196,7 +1196,7 @@ data:
"h": 7,
"w": 12,
"x": 12,
"y": 33
"y": 40
},
"targets": [
{
@ -1233,7 +1233,7 @@ data:
"h": 7,
"w": 8,
"x": 0,
"y": 40
"y": 25
},
"targets": [
{
@ -1277,7 +1277,7 @@ data:
"h": 7,
"w": 8,
"x": 8,
"y": 40
"y": 25
},
"targets": [
{
@ -1321,11 +1321,11 @@ data:
"h": 7,
"w": 8,
"x": 16,
"y": 40
"y": 25
},
"targets": [
{
"expr": "clamp_min((sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) - (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)), 0)",
"expr": "clamp_min(((sum(rate(container_network_receive_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace!=\"\",pod!=\"\",container!=\"\"}[5m])) or on() vector(0))) - ((sum(rate(container_network_receive_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0)) + (sum(rate(container_network_transmit_bytes_total{namespace=\"traefik\",pod=~\"traefik-.*\"}[5m])) or on() vector(0))), 0)",
"refId": "A",
"legendFormat": "Internal traffic"
}