From 3bbd0a6f90ce24226a0c25cd33e5918f8ed52955 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Mon, 13 Apr 2026 14:29:44 -0300 Subject: [PATCH] monitoring(jenkins): dedupe weather metrics and cap newest list rows --- scripts/dashboards_render_atlas.py | 15 ++++++++++++--- services/monitoring/dashboards/atlas-jobs.json | 8 ++++---- .../monitoring/dashboards/atlas-overview.json | 16 ++++++++-------- services/monitoring/grafana-dashboard-jobs.yaml | 8 ++++---- .../monitoring/grafana-dashboard-overview.yaml | 16 ++++++++-------- 5 files changed, 36 insertions(+), 27 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 880e58ac..e4e3a35f 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -496,11 +496,20 @@ JENKINS_BUILD_WEATHER_LAST_SUCCESS_AGE_HOURS = ( JENKINS_BUILD_WEATHER_LAST_FAILURE_AGE_HOURS = ( "(time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600" ) +JENKINS_BUILD_WEATHER_LAST_STATUS_BY_JOB = ( + f"max by (exported_job,job_url,weather_icon) ({JENKINS_BUILD_WEATHER_LAST_STATUS})" +) +JENKINS_BUILD_WEATHER_LAST_SUCCESS_AGE_HOURS_BY_JOB = ( + f"min by (exported_job,job_url,weather_icon) ({JENKINS_BUILD_WEATHER_LAST_SUCCESS_AGE_HOURS})" +) +JENKINS_BUILD_WEATHER_LAST_FAILURE_AGE_HOURS_BY_JOB = ( + f"min by (exported_job,job_url,weather_icon) ({JENKINS_BUILD_WEATHER_LAST_FAILURE_AGE_HOURS})" +) JENKINS_BUILD_WEATHER_LAST_SUCCESS_NEWEST_6_HOURS = ( - f"sort(bottomk(6, {JENKINS_BUILD_WEATHER_LAST_SUCCESS_AGE_HOURS}))" + f"sort(bottomk(6, {JENKINS_BUILD_WEATHER_LAST_SUCCESS_AGE_HOURS_BY_JOB}))" ) JENKINS_BUILD_WEATHER_LAST_FAILURE_NEWEST_6_HOURS = ( - f"sort(bottomk(6, {JENKINS_BUILD_WEATHER_LAST_FAILURE_AGE_HOURS}))" + f"sort(bottomk(6, {JENKINS_BUILD_WEATHER_LAST_FAILURE_AGE_HOURS_BY_JOB}))" ) JENKINS_BUILD_WEATHER_LAST_DURATION_MINUTES = ( "ariadne_jenkins_build_weather_job_last_duration_seconds / 60" @@ -1394,7 +1403,7 @@ def bargauge_panel( def _jenkins_weather_status_expr(base_expr, comparator): return ( f"({base_expr}) and on(exported_job,job_url,weather_icon) " - f"({JENKINS_BUILD_WEATHER_LAST_STATUS} {comparator})" + f"({JENKINS_BUILD_WEATHER_LAST_STATUS_BY_JOB} {comparator})" ) diff --git a/services/monitoring/dashboards/atlas-jobs.json b/services/monitoring/dashboards/atlas-jobs.json index 0f6014c5..21053b59 100644 --- a/services/monitoring/dashboards/atlas-jobs.json +++ b/services/monitoring/dashboards/atlas-jobs.json @@ -1551,25 +1551,25 @@ "targets": [ { "refId": "A", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 1)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 1)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "B", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 0)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "C", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 2)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 2)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "D", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status < 0)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) < 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true } diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index f0a75a9c..a7af2f69 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -2298,25 +2298,25 @@ "targets": [ { "refId": "A", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 1)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 1)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "B", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 0)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "C", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 2)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 2)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "D", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status < 0)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) < 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true } @@ -2453,25 +2453,25 @@ "targets": [ { "refId": "A", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 1)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 1)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "B", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 0)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "C", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 2)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 2)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "D", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status < 0)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) < 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true } diff --git a/services/monitoring/grafana-dashboard-jobs.yaml b/services/monitoring/grafana-dashboard-jobs.yaml index 023d918e..68e75bd7 100644 --- a/services/monitoring/grafana-dashboard-jobs.yaml +++ b/services/monitoring/grafana-dashboard-jobs.yaml @@ -1560,25 +1560,25 @@ data: "targets": [ { "refId": "A", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 1)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 1)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "B", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 0)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "C", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 2)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 2)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "D", - "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status < 0)", + "expr": "((time() - ariadne_jenkins_build_weather_job_last_run_timestamp_seconds) / 3600) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) < 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true } diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 22b8f64c..0a2ee1cc 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -2307,25 +2307,25 @@ data: "targets": [ { "refId": "A", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 1)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 1)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "B", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 0)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "C", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 2)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 2)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "D", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status < 0)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_success_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) < 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true } @@ -2462,25 +2462,25 @@ data: "targets": [ { "refId": "A", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 1)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 1)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "B", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 0)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "C", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status == 2)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) == 2)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }, { "refId": "D", - "expr": "(sort(bottomk(6, (time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600))) and on(exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status < 0)", + "expr": "(sort(bottomk(6, min by (exported_job,job_url,weather_icon) ((time() - ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds) / 3600)))) and on(exported_job,job_url,weather_icon) (max by (exported_job,job_url,weather_icon) (ariadne_jenkins_build_weather_job_last_status) < 0)", "legendFormat": "{{weather_icon}} {{exported_job}}", "instant": true }