monitoring: tighten overview stats

This commit is contained in:
Brad Stein 2025-11-17 19:24:03 -03:00
parent 349d9c56ac
commit fe8deea9c7
5 changed files with 121 additions and 48 deletions

View File

@ -221,7 +221,6 @@ def stat_panel(
thresholds=None, thresholds=None,
text_mode="value", text_mode="value",
legend=None, legend=None,
display_name=None,
value_suffix=None, value_suffix=None,
links=None, links=None,
): ):
@ -242,8 +241,6 @@ def stat_panel(
} }
if value_suffix: if value_suffix:
defaults["custom"]["valueSuffix"] = value_suffix defaults["custom"]["valueSuffix"] = value_suffix
if display_name:
defaults["displayName"] = display_name
panel = { panel = {
"id": panel_id, "id": panel_id,
"type": "stat", "type": "stat",
@ -385,7 +382,7 @@ def build_overview():
(1, "Running pods", 'sum(kube_pod_status_phase{phase="Running"})', None, None, None), (1, "Running pods", 'sum(kube_pod_status_phase{phase="Running"})', None, None, None),
( (
2, 2,
"Ready nodes", "Ready workers",
f'sum(kube_node_status_condition{{condition="Ready",status="true",node=~"{WORKER_REGEX}"}})', f'sum(kube_node_status_condition{{condition="Ready",status="true",node=~"{WORKER_REGEX}"}})',
WORKER_SUFFIX, WORKER_SUFFIX,
WORKER_TOTAL, WORKER_TOTAL,
@ -426,20 +423,32 @@ def build_overview():
] ]
for idx, (panel_id, title, expr, suffix, ok_value, links) in enumerate(row1_stats): for idx, (panel_id, title, expr, suffix, ok_value, links) in enumerate(row1_stats):
thresholds = None thresholds = None
if panel_id in (2, 3): if panel_id == 2:
thresholds = { thresholds = {
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{"color": "red", "value": None}, {"color": "red", "value": None},
{"color": "green", "value": ok_value}, {"color": "orange", "value": WORKER_TOTAL - 2},
{"color": "yellow", "value": WORKER_TOTAL - 1},
{"color": "green", "value": WORKER_TOTAL},
], ],
} }
elif panel_id >= 4: elif panel_id == 3:
thresholds = {
"mode": "absolute",
"steps": [
{"color": "red", "value": None},
{"color": "green", "value": CONTROL_TOTAL},
],
}
elif panel_id in (4, 5, 6):
thresholds = { thresholds = {
"mode": "absolute", "mode": "absolute",
"steps": [ "steps": [
{"color": "green", "value": None}, {"color": "green", "value": None},
{"color": "red", "value": 1}, {"color": "yellow", "value": 1},
{"color": "orange", "value": 2},
{"color": "red", "value": 3},
], ],
} }
panels.append( panels.append(
@ -470,7 +479,7 @@ def build_overview():
unit=unit, unit=unit,
thresholds=PERCENT_THRESHOLDS if unit == "percent" else None, thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
text_mode="value_and_name", text_mode="value_and_name",
display_name="{{node}}", legend="{{node}}",
links=link_to("atlas-nodes"), links=link_to("atlas-nodes"),
) )
) )
@ -1006,7 +1015,7 @@ def build_network_dashboard():
f"topk(1, {TRAEFIK_ROUTER_EXPR})", f"topk(1, {TRAEFIK_ROUTER_EXPR})",
{"h": 4, "w": 8, "x": 16, "y": 0}, {"h": 4, "w": 8, "x": 16, "y": 0},
unit="req/s", unit="req/s",
display_name="{{router}}", legend="{{router}}",
) )
) )
panels.append( panels.append(

View File

@ -141,7 +141,8 @@
"targets": [ "targets": [
{ {
"expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))", "expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
"refId": "A" "refId": "A",
"legendFormat": "{{router}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -166,8 +167,7 @@
"unit": "req/s", "unit": "req/s",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
}, }
"displayName": "{{router}}"
}, },
"overrides": [] "overrides": []
}, },

View File

@ -70,7 +70,7 @@
{ {
"id": 2, "id": 2,
"type": "stat", "type": "stat",
"title": "Ready nodes", "title": "Ready workers",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "atlas-vm" "uid": "atlas-vm"
@ -100,6 +100,14 @@
"color": "red", "color": "red",
"value": null "value": null
}, },
{
"color": "orange",
"value": 16
},
{
"color": "yellow",
"value": 17
},
{ {
"color": "green", "color": "green",
"value": 18 "value": 18
@ -223,8 +231,16 @@
"value": null "value": null
}, },
{ {
"color": "red", "color": "yellow",
"value": 1 "value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
} }
] ]
}, },
@ -290,8 +306,16 @@
"value": null "value": null
}, },
{ {
"color": "red", "color": "yellow",
"value": 1 "value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
} }
] ]
}, },
@ -357,8 +381,16 @@
"value": null "value": null
}, },
{ {
"color": "red", "color": "yellow",
"value": 1 "value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
} }
] ]
}, },
@ -407,7 +439,8 @@
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))", "expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A" "refId": "A",
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -436,8 +469,7 @@
"unit": "percent", "unit": "percent",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
}, }
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },
@ -479,7 +511,8 @@
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))", "expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A" "refId": "A",
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -508,8 +541,7 @@
"unit": "percent", "unit": "percent",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
}, }
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },
@ -551,7 +583,8 @@
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))", "expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
"refId": "A" "refId": "A",
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -576,8 +609,7 @@
"unit": "Bps", "unit": "Bps",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
}, }
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },
@ -619,7 +651,8 @@
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))", "expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
"refId": "A" "refId": "A",
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -644,8 +677,7 @@
"unit": "Bps", "unit": "Bps",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
}, }
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },

View File

@ -150,7 +150,8 @@ data:
"targets": [ "targets": [
{ {
"expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))", "expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
"refId": "A" "refId": "A",
"legendFormat": "{{router}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -175,8 +176,7 @@ data:
"unit": "req/s", "unit": "req/s",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
}, }
"displayName": "{{router}}"
}, },
"overrides": [] "overrides": []
}, },

View File

@ -79,7 +79,7 @@ data:
{ {
"id": 2, "id": 2,
"type": "stat", "type": "stat",
"title": "Ready nodes", "title": "Ready workers",
"datasource": { "datasource": {
"type": "prometheus", "type": "prometheus",
"uid": "atlas-vm" "uid": "atlas-vm"
@ -109,6 +109,14 @@ data:
"color": "red", "color": "red",
"value": null "value": null
}, },
{
"color": "orange",
"value": 16
},
{
"color": "yellow",
"value": 17
},
{ {
"color": "green", "color": "green",
"value": 18 "value": 18
@ -232,8 +240,16 @@ data:
"value": null "value": null
}, },
{ {
"color": "red", "color": "yellow",
"value": 1 "value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
} }
] ]
}, },
@ -299,8 +315,16 @@ data:
"value": null "value": null
}, },
{ {
"color": "red", "color": "yellow",
"value": 1 "value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
} }
] ]
}, },
@ -366,8 +390,16 @@ data:
"value": null "value": null
}, },
{ {
"color": "red", "color": "yellow",
"value": 1 "value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
} }
] ]
}, },
@ -416,7 +448,8 @@ data:
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))", "expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A" "refId": "A",
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -445,8 +478,7 @@ data:
"unit": "percent", "unit": "percent",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
}, }
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },
@ -488,7 +520,8 @@ data:
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))", "expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A" "refId": "A",
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -517,8 +550,7 @@ data:
"unit": "percent", "unit": "percent",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
}, }
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },
@ -560,7 +592,8 @@ data:
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))", "expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
"refId": "A" "refId": "A",
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -585,8 +618,7 @@ data:
"unit": "Bps", "unit": "Bps",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
}, }
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },
@ -628,7 +660,8 @@ data:
"targets": [ "targets": [
{ {
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))", "expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
"refId": "A" "refId": "A",
"legendFormat": "{{node}}"
} }
], ],
"fieldConfig": { "fieldConfig": {
@ -653,8 +686,7 @@ data:
"unit": "Bps", "unit": "Bps",
"custom": { "custom": {
"displayMode": "auto" "displayMode": "auto"
}, }
"displayName": "{{node}}"
}, },
"overrides": [] "overrides": []
}, },