diff --git a/scripts/render_dashboards.py b/scripts/render_dashboards.py index 083ddfe..b88d5a4 100644 --- a/scripts/render_dashboards.py +++ b/scripts/render_dashboards.py @@ -221,7 +221,6 @@ def stat_panel( thresholds=None, text_mode="value", legend=None, - display_name=None, value_suffix=None, links=None, ): @@ -242,8 +241,6 @@ def stat_panel( } if value_suffix: defaults["custom"]["valueSuffix"] = value_suffix - if display_name: - defaults["displayName"] = display_name panel = { "id": panel_id, "type": "stat", @@ -385,7 +382,7 @@ def build_overview(): (1, "Running pods", 'sum(kube_pod_status_phase{phase="Running"})', None, None, None), ( 2, - "Ready nodes", + "Ready workers", f'sum(kube_node_status_condition{{condition="Ready",status="true",node=~"{WORKER_REGEX}"}})', WORKER_SUFFIX, WORKER_TOTAL, @@ -426,20 +423,32 @@ def build_overview(): ] for idx, (panel_id, title, expr, suffix, ok_value, links) in enumerate(row1_stats): thresholds = None - if panel_id in (2, 3): + if panel_id == 2: thresholds = { "mode": "absolute", "steps": [ {"color": "red", "value": None}, - {"color": "green", "value": ok_value}, + {"color": "orange", "value": WORKER_TOTAL - 2}, + {"color": "yellow", "value": WORKER_TOTAL - 1}, + {"color": "green", "value": WORKER_TOTAL}, ], } - elif panel_id >= 4: + elif panel_id == 3: + thresholds = { + "mode": "absolute", + "steps": [ + {"color": "red", "value": None}, + {"color": "green", "value": CONTROL_TOTAL}, + ], + } + elif panel_id in (4, 5, 6): thresholds = { "mode": "absolute", "steps": [ {"color": "green", "value": None}, - {"color": "red", "value": 1}, + {"color": "yellow", "value": 1}, + {"color": "orange", "value": 2}, + {"color": "red", "value": 3}, ], } panels.append( @@ -470,7 +479,7 @@ def build_overview(): unit=unit, thresholds=PERCENT_THRESHOLDS if unit == "percent" else None, text_mode="value_and_name", - display_name="{{node}}", + legend="{{node}}", links=link_to("atlas-nodes"), ) ) @@ -1006,7 +1015,7 @@ def build_network_dashboard(): f"topk(1, {TRAEFIK_ROUTER_EXPR})", {"h": 4, "w": 8, "x": 16, "y": 0}, unit="req/s", - display_name="{{router}}", + legend="{{router}}", ) ) panels.append( diff --git a/services/monitoring/dashboards/atlas-network.json b/services/monitoring/dashboards/atlas-network.json index 369024f..e412045 100644 --- a/services/monitoring/dashboards/atlas-network.json +++ b/services/monitoring/dashboards/atlas-network.json @@ -141,7 +141,8 @@ "targets": [ { "expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))", - "refId": "A" + "refId": "A", + "legendFormat": "{{router}}" } ], "fieldConfig": { @@ -166,8 +167,7 @@ "unit": "req/s", "custom": { "displayMode": "auto" - }, - "displayName": "{{router}}" + } }, "overrides": [] }, diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index ec7a848..ec137f1 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -70,7 +70,7 @@ { "id": 2, "type": "stat", - "title": "Ready nodes", + "title": "Ready workers", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -100,6 +100,14 @@ "color": "red", "value": null }, + { + "color": "orange", + "value": 16 + }, + { + "color": "yellow", + "value": 17 + }, { "color": "green", "value": 18 @@ -223,8 +231,16 @@ "value": null }, { - "color": "red", + "color": "yellow", "value": 1 + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 3 } ] }, @@ -290,8 +306,16 @@ "value": null }, { - "color": "red", + "color": "yellow", "value": 1 + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 3 } ] }, @@ -357,8 +381,16 @@ "value": null }, { - "color": "red", + "color": "yellow", "value": 1 + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 3 } ] }, @@ -407,7 +439,8 @@ "targets": [ { "expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))", - "refId": "A" + "refId": "A", + "legendFormat": "{{node}}" } ], "fieldConfig": { @@ -436,8 +469,7 @@ "unit": "percent", "custom": { "displayMode": "auto" - }, - "displayName": "{{node}}" + } }, "overrides": [] }, @@ -479,7 +511,8 @@ "targets": [ { "expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))", - "refId": "A" + "refId": "A", + "legendFormat": "{{node}}" } ], "fieldConfig": { @@ -508,8 +541,7 @@ "unit": "percent", "custom": { "displayMode": "auto" - }, - "displayName": "{{node}}" + } }, "overrides": [] }, @@ -551,7 +583,8 @@ "targets": [ { "expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))", - "refId": "A" + "refId": "A", + "legendFormat": "{{node}}" } ], "fieldConfig": { @@ -576,8 +609,7 @@ "unit": "Bps", "custom": { "displayMode": "auto" - }, - "displayName": "{{node}}" + } }, "overrides": [] }, @@ -619,7 +651,8 @@ "targets": [ { "expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))", - "refId": "A" + "refId": "A", + "legendFormat": "{{node}}" } ], "fieldConfig": { @@ -644,8 +677,7 @@ "unit": "Bps", "custom": { "displayMode": "auto" - }, - "displayName": "{{node}}" + } }, "overrides": [] }, diff --git a/services/monitoring/grafana-dashboard-network.yaml b/services/monitoring/grafana-dashboard-network.yaml index 07c8b7a..6963e89 100644 --- a/services/monitoring/grafana-dashboard-network.yaml +++ b/services/monitoring/grafana-dashboard-network.yaml @@ -150,7 +150,8 @@ data: "targets": [ { "expr": "topk(1, sum by (router) (rate(traefik_router_requests_total[5m])))", - "refId": "A" + "refId": "A", + "legendFormat": "{{router}}" } ], "fieldConfig": { @@ -175,8 +176,7 @@ data: "unit": "req/s", "custom": { "displayMode": "auto" - }, - "displayName": "{{router}}" + } }, "overrides": [] }, diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index bb3bb11..12555ee 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -79,7 +79,7 @@ data: { "id": 2, "type": "stat", - "title": "Ready nodes", + "title": "Ready workers", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -109,6 +109,14 @@ data: "color": "red", "value": null }, + { + "color": "orange", + "value": 16 + }, + { + "color": "yellow", + "value": 17 + }, { "color": "green", "value": 18 @@ -232,8 +240,16 @@ data: "value": null }, { - "color": "red", + "color": "yellow", "value": 1 + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 3 } ] }, @@ -299,8 +315,16 @@ data: "value": null }, { - "color": "red", + "color": "yellow", "value": 1 + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 3 } ] }, @@ -366,8 +390,16 @@ data: "value": null }, { - "color": "red", + "color": "yellow", "value": 1 + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 3 } ] }, @@ -416,7 +448,8 @@ data: "targets": [ { "expr": "topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m]))) * 100) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))", - "refId": "A" + "refId": "A", + "legendFormat": "{{node}}" } ], "fieldConfig": { @@ -445,8 +478,7 @@ data: "unit": "percent", "custom": { "displayMode": "auto" - }, - "displayName": "{{node}}" + } }, "overrides": [] }, @@ -488,7 +520,8 @@ data: "targets": [ { "expr": "topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\")))", - "refId": "A" + "refId": "A", + "legendFormat": "{{node}}" } ], "fieldConfig": { @@ -517,8 +550,7 @@ data: "unit": "percent", "custom": { "displayMode": "auto" - }, - "displayName": "{{node}}" + } }, "overrides": [] }, @@ -560,7 +592,8 @@ data: "targets": [ { "expr": "topk(1, avg by (node) (rate(container_network_receive_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m]) + rate(container_network_transmit_bytes_total{namespace!=\"\" ,pod!=\"\"}[5m])))", - "refId": "A" + "refId": "A", + "legendFormat": "{{node}}" } ], "fieldConfig": { @@ -585,8 +618,7 @@ data: "unit": "Bps", "custom": { "displayMode": "auto" - }, - "displayName": "{{node}}" + } }, "overrides": [] }, @@ -628,7 +660,8 @@ data: "targets": [ { "expr": "topk(1, avg by (node) (rate(node_disk_read_bytes_total[5m]) + rate(node_disk_written_bytes_total[5m])))", - "refId": "A" + "refId": "A", + "legendFormat": "{{node}}" } ], "fieldConfig": { @@ -653,8 +686,7 @@ data: "unit": "Bps", "custom": { "displayMode": "auto" - }, - "displayName": "{{node}}" + } }, "overrides": [] },