monitoring: tighten overview stats

This commit is contained in:
Brad Stein 2025-11-17 19:24:03 -03:00
parent 349d9c56ac
commit fe8deea9c7
5 changed files with 121 additions and 48 deletions

View File

@ -221,7 +221,6 @@ def stat_panel(
thresholds=None,
text_mode="value",
legend=None,
display_name=None,
value_suffix=None,
links=None,
):
@ -242,8 +241,6 @@ def stat_panel(
}
if value_suffix:
defaults["custom"]["valueSuffix"] = value_suffix
if display_name:
defaults["displayName"] = display_name
panel = {
"id": panel_id,
"type": "stat",
@ -385,7 +382,7 @@ def build_overview():
(1, "Running pods", 'sum(kube_pod_status_phase{phase="Running"})', None, None, None),
(
2,
"Ready nodes",
"Ready workers",
f'sum(kube_node_status_condition{{condition="Ready",status="true",node=~"{WORKER_REGEX}"}})',
WORKER_SUFFIX,
WORKER_TOTAL,
@ -426,20 +423,32 @@ def build_overview():
]
for idx, (panel_id, title, expr, suffix, ok_value, links) in enumerate(row1_stats):
thresholds = None
if panel_id in (2, 3):
if panel_id == 2:
thresholds = {
"mode": "absolute",
"steps": [
{"color": "red", "value": None},
{"color": "green", "value": ok_value},
{"color": "orange", "value": WORKER_TOTAL - 2},
{"color": "yellow", "value": WORKER_TOTAL - 1},
{"color": "green", "value": WORKER_TOTAL},
],
}
elif panel_id >= 4:
elif panel_id == 3:
thresholds = {
"mode": "absolute",
"steps": [
{"color": "red", "value": None},
{"color": "green", "value": CONTROL_TOTAL},
],
}
elif panel_id in (4, 5, 6):
thresholds = {
"mode": "absolute",
"steps": [
{"color": "green", "value": None},
{"color": "red", "value": 1},
{"color": "yellow", "value": 1},
{"color": "orange", "value": 2},
{"color": "red", "value": 3},
],
}
panels.append(
@ -470,7 +479,7 @@ def build_overview():
unit=unit,
thresholds=PERCENT_THRESHOLDS if unit == "percent" else None,
text_mode="value_and_name",
display_name="{{node}}",
legend="{{node}}",
links=link_to("atlas-nodes"),
)
)
@ -1006,7 +1015,7 @@ def build_network_dashboard():
f"topk(1, {TRAEFIK_ROUTER_EXPR})",
{"h": 4, "w": 8, "x": 16, "y": 0},
unit="req/s",
display_name="{{router}}",
legend="{{router}}",
)
)
panels.append(

View File

@ -141,7 +141,8 @@
"targets": [
{
"expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
"refId": "A"
"refId": "A",
"legendFormat": "{{router}}"
}
],
"fieldConfig": {
@ -166,8 +167,7 @@
"unit": "req/s",
"custom": {
"displayMode": "auto"
},
"displayName": "{{router}}"
}
},
"overrides": []
},

View File

@ -70,7 +70,7 @@
{
"id": 2,
"type": "stat",
"title": "Ready nodes",
"title": "Ready workers",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -100,6 +100,14 @@
"color": "red",
"value": null
},
{
"color": "orange",
"value": 16
},
{
"color": "yellow",
"value": 17
},
{
"color": "green",
"value": 18
@ -223,8 +231,16 @@
"value": null
},
{
"color": "red",
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
@ -290,8 +306,16 @@
"value": null
},
{
"color": "red",
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
@ -357,8 +381,16 @@
"value": null
},
{
"color": "red",
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
@ -407,7 +439,8 @@
"targets": [
{
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A"
"refId": "A",
"legendFormat": "{{node}}"
}
],
"fieldConfig": {
@ -436,8 +469,7 @@
"unit": "percent",
"custom": {
"displayMode": "auto"
},
"displayName": "{{node}}"
}
},
"overrides": []
},
@ -479,7 +511,8 @@
"targets": [
{
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A"
"refId": "A",
"legendFormat": "{{node}}"
}
],
"fieldConfig": {
@ -508,8 +541,7 @@
"unit": "percent",
"custom": {
"displayMode": "auto"
},
"displayName": "{{node}}"
}
},
"overrides": []
},
@ -551,7 +583,8 @@
"targets": [
{
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
"refId": "A"
"refId": "A",
"legendFormat": "{{node}}"
}
],
"fieldConfig": {
@ -576,8 +609,7 @@
"unit": "Bps",
"custom": {
"displayMode": "auto"
},
"displayName": "{{node}}"
}
},
"overrides": []
},
@ -619,7 +651,8 @@
"targets": [
{
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
"refId": "A"
"refId": "A",
"legendFormat": "{{node}}"
}
],
"fieldConfig": {
@ -644,8 +677,7 @@
"unit": "Bps",
"custom": {
"displayMode": "auto"
},
"displayName": "{{node}}"
}
},
"overrides": []
},

View File

@ -150,7 +150,8 @@ data:
"targets": [
{
"expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))",
"refId": "A"
"refId": "A",
"legendFormat": "{{router}}"
}
],
"fieldConfig": {
@ -175,8 +176,7 @@ data:
"unit": "req/s",
"custom": {
"displayMode": "auto"
},
"displayName": "{{router}}"
}
},
"overrides": []
},

View File

@ -79,7 +79,7 @@ data:
{
"id": 2,
"type": "stat",
"title": "Ready nodes",
"title": "Ready workers",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -109,6 +109,14 @@ data:
"color": "red",
"value": null
},
{
"color": "orange",
"value": 16
},
{
"color": "yellow",
"value": 17
},
{
"color": "green",
"value": 18
@ -232,8 +240,16 @@ data:
"value": null
},
{
"color": "red",
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
@ -299,8 +315,16 @@ data:
"value": null
},
{
"color": "red",
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
@ -366,8 +390,16 @@ data:
"value": null
},
{
"color": "red",
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 2
},
{
"color": "red",
"value": 3
}
]
},
@ -416,7 +448,8 @@ data:
"targets": [
{
"expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A"
"refId": "A",
"legendFormat": "{{node}}"
}
],
"fieldConfig": {
@ -445,8 +478,7 @@ data:
"unit": "percent",
"custom": {
"displayMode": "auto"
},
"displayName": "{{node}}"
}
},
"overrides": []
},
@ -488,7 +520,8 @@ data:
"targets": [
{
"expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))",
"refId": "A"
"refId": "A",
"legendFormat": "{{node}}"
}
],
"fieldConfig": {
@ -517,8 +550,7 @@ data:
"unit": "percent",
"custom": {
"displayMode": "auto"
},
"displayName": "{{node}}"
}
},
"overrides": []
},
@ -560,7 +592,8 @@ data:
"targets": [
{
"expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))",
"refId": "A"
"refId": "A",
"legendFormat": "{{node}}"
}
],
"fieldConfig": {
@ -585,8 +618,7 @@ data:
"unit": "Bps",
"custom": {
"displayMode": "auto"
},
"displayName": "{{node}}"
}
},
"overrides": []
},
@ -628,7 +660,8 @@ data:
"targets": [
{
"expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))",
"refId": "A"
"refId": "A",
"legendFormat": "{{node}}"
}
],
"fieldConfig": {
@ -653,8 +686,7 @@ data:
"unit": "Bps",
"custom": {
"displayMode": "auto"
},
"displayName": "{{node}}"
}
},
"overrides": []
},