From 4bcb1cc940eb5fc9fabd117595f597a58291a3f5 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 13 Apr 2026 12:17:34 -0300 Subject: [PATCH] monitoring(overview): split jenkins weather into success/failure columns --- scripts/dashboards_render_atlas.py | 39 +++- .../monitoring/dashboards/atlas-overview.json | 189 ++++++++++++++++-- .../grafana-dashboard-overview.yaml | 189 ++++++++++++++++-- 3 files changed, 378 insertions(+), 39 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 1a0aa37d..dc4bd17c 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -1509,6 +1509,8 @@ def jenkins_weather_statlist_panel( decimals=2, sort_order="asc", limit=12, + title_size=12, + value_size=12, links=None, description=None, ): @@ -1584,7 +1586,7 @@ def jenkins_weather_statlist_panel( "wideLayout": True, "reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False}, "textMode": "name_and_value", - "text": {"titleSize": 12, "valueSize": 12}, + "text": {"titleSize": title_size, "valueSize": value_size}, }, "transformations": [{"id": "sortBy", "options": {"fields": ["Value"], "order": sort_order}}], } @@ -2104,7 +2106,7 @@ def build_overview(): 44, "One-off Job Pods (age hours)", ONEOFF_JOB_POD_AGE_HOURS, - {"h": 5, "w": 6, "x": 0, "y": 32}, + {"h": 5, "w": 8, "x": 0, "y": 32}, unit="h", instant=True, legend="{{namespace}}/{{pod}}", @@ -2180,18 +2182,33 @@ def build_overview(): panels.append( jenkins_weather_statlist_panel( 142, - "Jenkins Build Weather (last run h, newest first)", - JENKINS_BUILD_WEATHER_LAST_RUN_AGE_HOURS, - {"h": 5, "w": 12, "x": 6, "y": 32}, + "Jenkins Last Success (h, newest first)", + JENKINS_BUILD_WEATHER_LAST_SUCCESS_AGE_HOURS, + {"h": 5, "w": 4, "x": 8, "y": 32}, unit="h", decimals=1, sort_order="asc", - limit=12, + limit=8, + title_size=10, + value_size=10, links=link_to("atlas-jobs"), - description=( - "Jenkins homepage-style list from Ariadne with weather icons and status-colored job names. " - "Click a name to open the Jenkins job page." - ), + description="Age since each job's most recent successful run; newest runs appear first.", + ) + ) + panels.append( + jenkins_weather_statlist_panel( + 243, + "Jenkins Last Failure (h, newest first)", + JENKINS_BUILD_WEATHER_LAST_FAILURE_AGE_HOURS, + {"h": 5, "w": 4, "x": 12, "y": 32}, + unit="h", + decimals=1, + sort_order="asc", + limit=8, + title_size=10, + value_size=10, + links=link_to("atlas-jobs"), + description="Age since each job's most recent failed run; newest failures appear first.", ) ) panels.append( @@ -2199,7 +2216,7 @@ def build_overview(): 47, "PVC Backup Health / Age", PVC_BACKUP_AGE_HOURS_BY_PVC, - {"h": 5, "w": 6, "x": 18, "y": 32}, + {"h": 5, "w": 8, "x": 16, "y": 32}, unit="h", instant=True, legend="{{namespace}}/{{pvc}}", diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index c1e6933d..3f2bd9ed 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -2013,7 +2013,7 @@ }, "gridPos": { "h": 5, - "w": 6, + "w": 8, "x": 0, "y": 32 }, @@ -2284,39 +2284,39 @@ { "id": 142, "type": "stat", - "title": "Jenkins Build Weather (last run h, newest first)", + "title": "Jenkins Last Success (h, newest first)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 5, - "w": 12, - "x": 6, + "w": 4, + "x": 8, "y": 32 }, "targets": [ { "refId": "A", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 1)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 1)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "B", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 0)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "C", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 2)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 2)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "D", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status < 0)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status < 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true } @@ -2412,8 +2412,8 @@ }, "textMode": "name_and_value", "text": { - "titleSize": 12, - "valueSize": 12 + "titleSize": 10, + "valueSize": 10 } }, "transformations": [ @@ -2429,7 +2429,7 @@ { "id": "limit", "options": { - "limit": 12 + "limit": 8 } } ], @@ -2440,7 +2440,168 @@ "targetBlank": true } ], - "description": "Jenkins homepage-style list from Ariadne with weather icons and status-colored job names. Click a name to open the Jenkins job page." + "description": "Age since each job's most recent successful run; newest runs appear first." + }, + { + "id": 243, + "type": "stat", + "title": "Jenkins Last Failure (h, newest first)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 32 + }, + "targets": [ + { + "refId": "A", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 1)", + "legendFormat": "{{weather_icon}} {{exported_job}}", + "instant": true + }, + { + "refId": "B", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 0)", + "legendFormat": "{{weather_icon}} {{exported_job}}", + "instant": true + }, + { + "refId": "C", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 2)", + "legendFormat": "{{weather_icon}} {{exported_job}}", + "instant": true + }, + { + "refId": "D", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status < 0)", + "legendFormat": "{{weather_icon}} {{exported_job}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "h", + "decimals": 1, + "min": 0, + "links": [ + { + "title": "Open Jenkins job", + "url": "${__field.labels.job_url}", + "targetBlank": true + } + ] + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "green" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "red" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "yellow" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "D" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "gray" + } + } + ] + } + ] + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "left", + "orientation": "horizontal", + "wideLayout": true, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "name_and_value", + "text": { + "titleSize": 10, + "valueSize": 10 + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "asc" + } + }, + { + "id": "limit", + "options": { + "limit": 8 + } + } + ], + "links": [ + { + "title": "Open atlas-jobs dashboard", + "url": "/d/atlas-jobs", + "targetBlank": true + } + ], + "description": "Age since each job's most recent failed run; newest failures appear first." }, { "id": 47, @@ -2452,8 +2613,8 @@ }, "gridPos": { "h": 5, - "w": 6, - "x": 18, + "w": 8, + "x": 16, "y": 32 }, "targets": [ diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 20d41f7e..4831b644 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -2022,7 +2022,7 @@ data: }, "gridPos": { "h": 5, - "w": 6, + "w": 8, "x": 0, "y": 32 }, @@ -2293,39 +2293,39 @@ data: { "id": 142, "type": "stat", - "title": "Jenkins Build Weather (last run h, newest first)", + "title": "Jenkins Last Success (h, newest first)", "datasource": { "type": "prometheus", "uid": "atlas-vm" }, "gridPos": { "h": 5, - "w": 12, - "x": 6, + "w": 4, + "x": 8, "y": 32 }, "targets": [ { "refId": "A", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 1)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 1)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "B", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 0)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "C", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 2)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 2)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "D", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status < 0)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status < 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true } @@ -2421,8 +2421,8 @@ data: }, "textMode": "name_and_value", "text": { - "titleSize": 12, - "valueSize": 12 + "titleSize": 10, + "valueSize": 10 } }, "transformations": [ @@ -2438,7 +2438,7 @@ data: { "id": "limit", "options": { - "limit": 12 + "limit": 8 } } ], @@ -2449,7 +2449,168 @@ data: "targetBlank": true } ], - "description": "Jenkins homepage-style list from Ariadne with weather icons and status-colored job names. Click a name to open the Jenkins job page." + "description": "Age since each job's most recent successful run; newest runs appear first." + }, + { + "id": 243, + "type": "stat", + "title": "Jenkins Last Failure (h, newest first)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 32 + }, + "targets": [ + { + "refId": "A", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 1)", + "legendFormat": "{{weather_icon}} {{exported_job}}", + "instant": true + }, + { + "refId": "B", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 0)", + "legendFormat": "{{weather_icon}} {{exported_job}}", + "instant": true + }, + { + "refId": "C", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 2)", + "legendFormat": "{{weather_icon}} {{exported_job}}", + "instant": true + }, + { + "refId": "D", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status < 0)", + "legendFormat": "{{weather_icon}} {{exported_job}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "h", + "decimals": 1, + "min": 0, + "links": [ + { + "title": "Open Jenkins job", + "url": "${__field.labels.job_url}", + "targetBlank": true + } + ] + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "green" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "red" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "yellow" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "D" + }, + "properties": [ + { + "id": "color", + "value": { + "mode": "fixed", + "fixedColor": "gray" + } + } + ] + } + ] + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "left", + "orientation": "horizontal", + "wideLayout": true, + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "name_and_value", + "text": { + "titleSize": 10, + "valueSize": 10 + } + }, + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "asc" + } + }, + { + "id": "limit", + "options": { + "limit": 8 + } + } + ], + "links": [ + { + "title": "Open atlas-jobs dashboard", + "url": "/d/atlas-jobs", + "targetBlank": true + } + ], + "description": "Age since each job's most recent failed run; newest failures appear first." }, { "id": 47, @@ -2461,8 +2622,8 @@ data: }, "gridPos": { "h": 5, - "w": 6, - "x": 18, + "w": 8, + "x": 16, "y": 32 }, "targets": [