diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 5ac7d16c..b34d70a1 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -446,29 +446,32 @@ PLATFORM_TEST_ACTIVITY_30D = ( 'or label_replace(sum by (status) (increase(metis_flashes_total[30d])), "source", "metis-flash", "__name__", ".*") ' 'or label_replace(sum by (status) (increase(ananke_quality_gate_runs_total{suite="ananke"}[30d])), "source", "ananke-quality", "__name__", ".*")' ) -ARIADNE_SUITE_OK_INTERVAL = 'sum(increase(ariadne_task_runs_total{status="ok"}[$__interval]))' -ARIADNE_SUITE_TOTAL_INTERVAL = 'sum(increase(ariadne_task_runs_total[$__interval]))' +PLATFORM_TEST_ROLLING_WINDOW = "30d" +ARIADNE_SUITE_OK_INTERVAL = f'sum(increase(ariadne_task_runs_total{{status="ok"}}[{PLATFORM_TEST_ROLLING_WINDOW}]))' +ARIADNE_SUITE_TOTAL_INTERVAL = f'sum(increase(ariadne_task_runs_total[{PLATFORM_TEST_ROLLING_WINDOW}]))' METIS_SUITE_OK_INTERVAL = ( - '(sum(increase(metis_builds_total{status="ok"}[$__interval])) + ' - 'sum(increase(metis_flashes_total{status="ok"}[$__interval])))' + f'(sum(increase(metis_builds_total{{status="ok"}}[{PLATFORM_TEST_ROLLING_WINDOW}])) + ' + f'sum(increase(metis_flashes_total{{status="ok"}}[{PLATFORM_TEST_ROLLING_WINDOW}])))' ) METIS_SUITE_TOTAL_INTERVAL = ( - '(sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))' + f'(sum(increase(metis_builds_total[{PLATFORM_TEST_ROLLING_WINDOW}])) + ' + f'sum(increase(metis_flashes_total[{PLATFORM_TEST_ROLLING_WINDOW}])))' +) +ANANKE_SUITE_OK_INTERVAL = ( + f'sum(increase(ananke_quality_gate_runs_total{{suite="ananke",status="ok"}}[{PLATFORM_TEST_ROLLING_WINDOW}]))' +) +ANANKE_SUITE_TOTAL_INTERVAL = ( + f'sum(increase(ananke_quality_gate_runs_total{{suite="ananke"}}[{PLATFORM_TEST_ROLLING_WINDOW}]))' ) -ANANKE_SUITE_OK_INTERVAL = 'sum(increase(ananke_quality_gate_runs_total{suite="ananke",status="ok"}[$__interval]))' -ANANKE_SUITE_TOTAL_INTERVAL = 'sum(increase(ananke_quality_gate_runs_total{suite="ananke"}[$__interval]))' PLATFORM_TEST_SUCCESS_RATE_ARIADNE_SERIES = ( - f'(100 * ({ARIADNE_SUITE_OK_INTERVAL}) / ({ARIADNE_SUITE_TOTAL_INTERVAL})) ' - f'and on() (({ARIADNE_SUITE_TOTAL_INTERVAL}) > 0)' + f'100 * ({ARIADNE_SUITE_OK_INTERVAL}) / clamp_min(({ARIADNE_SUITE_TOTAL_INTERVAL}), 1)' ) PLATFORM_TEST_SUCCESS_RATE_METIS_SERIES = ( - f'(100 * ({METIS_SUITE_OK_INTERVAL}) / ({METIS_SUITE_TOTAL_INTERVAL})) ' - f'and on() (({METIS_SUITE_TOTAL_INTERVAL}) > 0)' + f'100 * ({METIS_SUITE_OK_INTERVAL}) / clamp_min(({METIS_SUITE_TOTAL_INTERVAL}), 1)' ) PLATFORM_TEST_SUCCESS_RATE_ANANKE_SERIES = ( - f'(100 * ({ANANKE_SUITE_OK_INTERVAL}) / ({ANANKE_SUITE_TOTAL_INTERVAL})) ' - f'and on() (({ANANKE_SUITE_TOTAL_INTERVAL}) > 0)' + f'100 * ({ANANKE_SUITE_OK_INTERVAL}) / clamp_min(({ANANKE_SUITE_TOTAL_INTERVAL}), 1)' ) PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS = [ @@ -1507,7 +1510,7 @@ def build_overview(): test_success["fieldConfig"]["defaults"]["min"] = 0 test_success["fieldConfig"]["defaults"]["max"] = 100 test_success["description"] = ( - "Application-level rolling pass rate (0-100). One line per suite (ariadne, metis, ananke); idle windows are left blank rather than forced to 0%." + "Application-level rolling pass rate (0-100) over the last 30 days. One line per suite (ariadne, metis, ananke)." ) panels.append(test_success) test_failures = stat_panel( @@ -2986,7 +2989,7 @@ def build_jobs_dashboard(): suite_panel["fieldConfig"]["defaults"]["min"] = 0 suite_panel["fieldConfig"]["defaults"]["max"] = 100 suite_panel["description"] = ( - "Application-level pass percentage over time. One series per suite: ariadne, metis, ananke." + "Application-level rolling pass percentage over the last 30 days. One series per suite: ariadne, metis, ananke." ) panels.append(suite_panel) diff --git a/services/monitoring/dashboards/atlas-jobs.json b/services/monitoring/dashboards/atlas-jobs.json index 0a3da12c..492e0574 100644 --- a/services/monitoring/dashboards/atlas-jobs.json +++ b/services/monitoring/dashboards/atlas-jobs.json @@ -1253,17 +1253,17 @@ "targets": [ { "refId": "A", - "expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval]))) / (sum(increase(ariadne_task_runs_total[$__interval])))) and on() ((sum(increase(ariadne_task_runs_total[$__interval]))) > 0)", + "expr": "100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d]))) / clamp_min((sum(increase(ariadne_task_runs_total[30d]))), 1)", "legendFormat": "ariadne" }, { "refId": "B", - "expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) + sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])))) / ((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval]))))) and on() (((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))) > 0)", + "expr": "100 * ((sum(increase(metis_builds_total{status=\"ok\"}[30d])) + sum(increase(metis_flashes_total{status=\"ok\"}[30d])))) / clamp_min(((sum(increase(metis_builds_total[30d])) + sum(increase(metis_flashes_total[30d])))), 1)", "legendFormat": "metis" }, { "refId": "C", - "expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[$__interval]))) / (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval])))) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))) > 0)", + "expr": "100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[30d]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[30d]))), 1)", "legendFormat": "ananke" } ], @@ -1284,7 +1284,7 @@ "mode": "multi" } }, - "description": "Application-level pass percentage over time. One series per suite: ariadne, metis, ananke." + "description": "Application-level rolling pass percentage over the last 30 days. One series per suite: ariadne, metis, ananke." } ], "time": { diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index f4552a3a..fad82534 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -1820,17 +1820,17 @@ "targets": [ { "refId": "A", - "expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval]))) / (sum(increase(ariadne_task_runs_total[$__interval])))) and on() ((sum(increase(ariadne_task_runs_total[$__interval]))) > 0)", + "expr": "100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d]))) / clamp_min((sum(increase(ariadne_task_runs_total[30d]))), 1)", "legendFormat": "ariadne" }, { "refId": "B", - "expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) + sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])))) / ((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval]))))) and on() (((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))) > 0)", + "expr": "100 * ((sum(increase(metis_builds_total{status=\"ok\"}[30d])) + sum(increase(metis_flashes_total{status=\"ok\"}[30d])))) / clamp_min(((sum(increase(metis_builds_total[30d])) + sum(increase(metis_flashes_total[30d])))), 1)", "legendFormat": "metis" }, { "refId": "C", - "expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[$__interval]))) / (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval])))) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))) > 0)", + "expr": "100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[30d]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[30d]))), 1)", "legendFormat": "ananke" } ], @@ -1861,7 +1861,7 @@ "targetBlank": true } ], - "description": "Application-level rolling pass rate (0-100). One line per suite (ariadne, metis, ananke); idle windows are left blank rather than forced to 0%." + "description": "Application-level rolling pass rate (0-100) over the last 30 days. One line per suite (ariadne, metis, ananke)." }, { "id": 47, diff --git a/services/monitoring/grafana-dashboard-jobs.yaml b/services/monitoring/grafana-dashboard-jobs.yaml index fc825a3a..a7979509 100644 --- a/services/monitoring/grafana-dashboard-jobs.yaml +++ b/services/monitoring/grafana-dashboard-jobs.yaml @@ -1262,17 +1262,17 @@ data: "targets": [ { "refId": "A", - "expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval]))) / (sum(increase(ariadne_task_runs_total[$__interval])))) and on() ((sum(increase(ariadne_task_runs_total[$__interval]))) > 0)", + "expr": "100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d]))) / clamp_min((sum(increase(ariadne_task_runs_total[30d]))), 1)", "legendFormat": "ariadne" }, { "refId": "B", - "expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) + sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])))) / ((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval]))))) and on() (((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))) > 0)", + "expr": "100 * ((sum(increase(metis_builds_total{status=\"ok\"}[30d])) + sum(increase(metis_flashes_total{status=\"ok\"}[30d])))) / clamp_min(((sum(increase(metis_builds_total[30d])) + sum(increase(metis_flashes_total[30d])))), 1)", "legendFormat": "metis" }, { "refId": "C", - "expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[$__interval]))) / (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval])))) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))) > 0)", + "expr": "100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[30d]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[30d]))), 1)", "legendFormat": "ananke" } ], @@ -1293,7 +1293,7 @@ data: "mode": "multi" } }, - "description": "Application-level pass percentage over time. One series per suite: ariadne, metis, ananke." + "description": "Application-level rolling pass percentage over the last 30 days. One series per suite: ariadne, metis, ananke." } ], "time": { diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index c58c47f0..a803c9a3 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -1829,17 +1829,17 @@ data: "targets": [ { "refId": "A", - "expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[$__interval]))) / (sum(increase(ariadne_task_runs_total[$__interval])))) and on() ((sum(increase(ariadne_task_runs_total[$__interval]))) > 0)", + "expr": "100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[30d]))) / clamp_min((sum(increase(ariadne_task_runs_total[30d]))), 1)", "legendFormat": "ariadne" }, { "refId": "B", - "expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[$__interval])) + sum(increase(metis_flashes_total{status=\"ok\"}[$__interval])))) / ((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval]))))) and on() (((sum(increase(metis_builds_total[$__interval])) + sum(increase(metis_flashes_total[$__interval])))) > 0)", + "expr": "100 * ((sum(increase(metis_builds_total{status=\"ok\"}[30d])) + sum(increase(metis_flashes_total{status=\"ok\"}[30d])))) / clamp_min(((sum(increase(metis_builds_total[30d])) + sum(increase(metis_flashes_total[30d])))), 1)", "legendFormat": "metis" }, { "refId": "C", - "expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[$__interval]))) / (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval])))) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[$__interval]))) > 0)", + "expr": "100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[30d]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[30d]))), 1)", "legendFormat": "ananke" } ], @@ -1870,7 +1870,7 @@ data: "targetBlank": true } ], - "description": "Application-level rolling pass rate (0-100). One line per suite (ariadne, metis, ananke); idle windows are left blank rather than forced to 0%." + "description": "Application-level rolling pass rate (0-100) over the last 30 days. One line per suite (ariadne, metis, ananke)." }, { "id": 47,