From 1c6c3992cfc2c7d29fd3f147cf6ef5733a2f42cf Mon Sep 17 00:00:00 2001 From: jenkins Date: Wed, 20 May 2026 13:26:33 -0300 Subject: [PATCH] monitoring(testing): reduce month-range query cost --- scripts/dashboards_render_atlas.py | 21 ++++++++++--------- scripts/tests/test_dashboards_render_atlas.py | 3 +++ .../monitoring/dashboards/atlas-testing.json | 6 +++--- .../monitoring/grafana-dashboard-testing.yaml | 6 +++--- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 09b98756..37ee84f0 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -77,11 +77,12 @@ def deduped_counter_increase(selector: str, window: str, step: str = "1m") -> st return f"increase((max without(instance, job) ({selector}))[{window}:{step}])" -def platform_runs_increase(label_selector: str, window: str) -> str: +def platform_runs_increase(label_selector: str, window: str, step: str = "1m") -> str: """Return a scrape-deduped increase for platform quality run gauges.""" return deduped_counter_increase( f"platform_quality_gate_runs_total{{{label_selector}}}", window, + step, ) PERCENT_THRESHOLDS = { @@ -608,10 +609,10 @@ PLATFORM_TEST_SOURCE_LINES_OVER_500_ROLLUP = ( ) PLATFORM_TEST_SONAR_HEALTH_ROLLUP = "platform_quality:sonar_gate_health_percent:latest_1h" PLATFORM_TEST_SUCCESS_EVENTS_30D = ( - f'(sum({platform_runs_increase(f"suite=~\"{PLATFORM_TEST_SUITE_MATCHER}\",status=~\"{PLATFORM_TEST_SUCCESS_STATUS}\",{PLATFORM_TEST_EXPORT_FILTER}", "30d")}) or on() vector(0))' + f'(sum({platform_runs_increase(f"suite=~\"{PLATFORM_TEST_SUITE_MATCHER}\",status=~\"{PLATFORM_TEST_SUCCESS_STATUS}\",{PLATFORM_TEST_EXPORT_FILTER}", "30d", "15m")}) or on() vector(0))' ) PLATFORM_TEST_TOTAL_EVENTS_30D = ( - f'(sum({platform_runs_increase(f"suite=~\"{PLATFORM_TEST_SUITE_MATCHER}\",{PLATFORM_TEST_EXPORT_FILTER}", "30d")}) or on() vector(0))' + f'(sum({platform_runs_increase(f"suite=~\"{PLATFORM_TEST_SUITE_MATCHER}\",{PLATFORM_TEST_EXPORT_FILTER}", "30d", "15m")}) or on() vector(0))' ) PLATFORM_TEST_SUCCESS_EVENTS_7D = ( f'(sum({platform_runs_increase(f"suite=~\"{PLATFORM_TEST_SUITE_MATCHER}\",status=~\"{PLATFORM_TEST_SUCCESS_STATUS}\",{PLATFORM_TEST_EXPORT_FILTER}", "7d")}) or on() vector(0))' @@ -641,7 +642,7 @@ PLATFORM_TEST_FAILURES_24H_BY_SUITE = ( f'sort_desc(sum by (suite) ({PLATFORM_TEST_RUNS_24H_ROLLUP}{{suite=~"{PLATFORM_TEST_SUITE_MATCHER}",status!~"{PLATFORM_TEST_SUCCESS_STATUS}"}}))' ) PLATFORM_TEST_ACTIVITY_30D = ( - f'sum by (suite, status) ({platform_runs_increase(f"suite=~\"{PLATFORM_TEST_SUITE_MATCHER}\",{PLATFORM_TEST_EXPORT_FILTER}", "30d")})' + f'sum by (suite, status) ({platform_runs_increase(f"suite=~\"{PLATFORM_TEST_SUITE_MATCHER}\",{PLATFORM_TEST_EXPORT_FILTER}", "30d", "15m")})' ) PLATFORM_TEST_RUNS_24H_TOTAL = PLATFORM_TEST_TOTAL_EVENTS_24H PLATFORM_TEST_ACTIVE_SUITES_24H = ( @@ -741,7 +742,7 @@ PLATFORM_TEST_SUCCESS_RATE_24H_BY_SUITE = ( f'/ clamp_min((sum by (suite) ({platform_runs_increase(f"suite=~\"{PLATFORM_TEST_SUITE_MATCHER}\",{PLATFORM_TEST_EXPORT_FILTER}", "24h")})), 1))' ) QUALITY_GATE_SUITE_INDEX_30D = ( - f'sum by (suite) ({platform_runs_increase(f"suite=~\"{PLATFORM_TEST_SUITE_CANONICAL_MATCHER}\",{PLATFORM_TEST_EXPORT_FILTER}", "30d")})' + f'sum by (suite) ({platform_runs_increase(f"suite=~\"{PLATFORM_TEST_SUITE_CANONICAL_MATCHER}\",{PLATFORM_TEST_EXPORT_FILTER}", "30d", "15m")})' ) QUALITY_GATE_COVERAGE_BY_SUITE = ( f'max by (suite) ({PLATFORM_TEST_COVERAGE_ROLLUP}{{suite=~"{PLATFORM_TEST_SUITE_CANONICAL_MATCHER}"}})' @@ -3942,10 +3943,10 @@ def build_jobs_dashboard(): f'branch=~"{branch_var}",status!~"{success}"}}' ) runs_24h = f'(sum({runs_24h_rollup_selector}) or on() vector(0))' - runs_30d = f'(sum({platform_runs_increase(runs_selector, "30d")}) or on() vector(0))' + runs_30d = f'(sum({platform_runs_increase(runs_selector, "30d", "15m")}) or on() vector(0))' success_24h = f'(sum({runs_24h_success_rollup_selector}) or on() vector(0))' success_30d = ( - f'(sum({platform_runs_increase(runs_success_selector, "30d")}) or on() vector(0))' + f'(sum({platform_runs_increase(runs_success_selector, "30d", "15m")}) or on() vector(0))' ) failures_24h = f'(sum({runs_24h_failure_rollup_selector}) or on() vector(0))' success_rate_24h = f"100 * ({success_24h}) / clamp_min(({runs_24h}), 1)" @@ -4110,13 +4111,13 @@ def build_jobs_dashboard(): f'category=~"{PLATFORM_TEST_CATEGORY_REGEX}"}})' ) recent_branch_evidence = ( - f'sort_desc(count by (suite, branch) (max_over_time(platform_quality_gate_build_info{{{build_info_selector}}}[30d])))' + f'sort_desc(count by (suite, branch) (max_over_time(platform_quality_gate_build_info{{{build_info_selector}}}[30d:15m])))' ) non_primary_branch_evidence = ( - f'count by (suite) (max_over_time(platform_quality_gate_build_info{{{build_info_selector},branch!~"main|master|origin/main|origin/master|unknown"}}[30d]))' + f'count by (suite) (max_over_time(platform_quality_gate_build_info{{{build_info_selector},branch!~"main|master|origin/main|origin/master|unknown"}}[30d:15m]))' ) branch_evidence_by_suite = ( - f'count by (suite) (max_over_time(platform_quality_gate_build_info{{{build_info_selector}}}[30d]))' + f'count by (suite) (max_over_time(platform_quality_gate_build_info{{{build_info_selector}}}[30d:15m]))' ) primary_branch_clean_by_suite = ( f'(100 * ((({branch_evidence_by_suite}) > bool 0) ' diff --git a/scripts/tests/test_dashboards_render_atlas.py b/scripts/tests/test_dashboards_render_atlas.py index 069d1043..5cfb9339 100644 --- a/scripts/tests/test_dashboards_render_atlas.py +++ b/scripts/tests/test_dashboards_render_atlas.py @@ -262,6 +262,7 @@ def test_jobs_dashboard_separates_current_gate_health_from_reliability(): suite_freshness_expr = panels_by_title["Suite Freshness (24h)"]["targets"][0]["expr"] assert "platform_quality:suite_runs:increase_24h" in suite_freshness_expr assert "max_over_time(platform_quality_gate_runs_total" not in suite_freshness_expr + assert "[30d:15m]" in panels_by_title["CI Run Success Rate (30d)"]["targets"][0]["expr"] assert panels_by_title["Latest Gate Health by Suite"]["gridPos"]["w"] == 6 assert panels_by_title["CI Run Success by Suite (24h)"]["gridPos"]["w"] == 6 assert panels_by_title["Coverage by Suite (Latest, gate 95)"]["gridPos"] == {"h": 7, "w": 6, "x": 12, "y": 4} @@ -437,6 +438,8 @@ def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint(): recent_branch_panel = nested_panels_by_title["Recent Branch Evidence by Suite (30d)"] assert branch_panel["gridPos"]["x"] == 12 assert recent_branch_panel["gridPos"]["x"] == 18 + assert "[30d:15m]" in recent_branch_panel["targets"][0]["expr"] + assert "[30d:15m]" in branch_panel["targets"][0]["expr"] assert branch_panel["fieldConfig"]["defaults"]["unit"] == "percent" assert "unless on(suite)" in branch_panel["targets"][0]["expr"] assert "> bool 0" in branch_panel["targets"][0]["expr"] diff --git a/services/monitoring/dashboards/atlas-testing.json b/services/monitoring/dashboards/atlas-testing.json index b33275f1..ca929df3 100644 --- a/services/monitoring/dashboards/atlas-testing.json +++ b/services/monitoring/dashboards/atlas-testing.json @@ -170,7 +170,7 @@ }, "targets": [ { - "expr": "100 * ((sum(increase((max without(instance, job) (platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}))[30d:1m])) or on() vector(0))) / clamp_min(((sum(increase((max without(instance, job) (platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}))[30d:1m])) or on() vector(0))), 1)", + "expr": "100 * ((sum(increase((max without(instance, job) (platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}))[30d:15m])) or on() vector(0))) / clamp_min(((sum(increase((max without(instance, job) (platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}))[30d:15m])) or on() vector(0))), 1)", "refId": "A", "instant": true } @@ -3960,7 +3960,7 @@ }, "targets": [ { - "expr": "sort((100 * (((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d]))) > bool 0) unless on(suite) ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\",branch!~\"main|master|origin/main|origin/master|unknown\"}[30d]))) > bool 0))) or on(suite) (0 * ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d]))) > bool 0)))", + "expr": "sort((100 * (((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d:15m]))) > bool 0) unless on(suite) ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\",branch!~\"main|master|origin/main|origin/master|unknown\"}[30d:15m]))) > bool 0))) or on(suite) (0 * ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d:15m]))) > bool 0)))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -4160,7 +4160,7 @@ }, "targets": [ { - "expr": "sort_desc(count by (suite, branch) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d])))", + "expr": "sort_desc(count by (suite, branch) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d:15m])))", "refId": "A", "legendFormat": "{{suite}} \u00b7 {{branch}}", "instant": true diff --git a/services/monitoring/grafana-dashboard-testing.yaml b/services/monitoring/grafana-dashboard-testing.yaml index 470b3320..0c5ee8fd 100644 --- a/services/monitoring/grafana-dashboard-testing.yaml +++ b/services/monitoring/grafana-dashboard-testing.yaml @@ -179,7 +179,7 @@ data: }, "targets": [ { - "expr": "100 * ((sum(increase((max without(instance, job) (platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}))[30d:1m])) or on() vector(0))) / clamp_min(((sum(increase((max without(instance, job) (platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}))[30d:1m])) or on() vector(0))), 1)", + "expr": "100 * ((sum(increase((max without(instance, job) (platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}))[30d:15m])) or on() vector(0))) / clamp_min(((sum(increase((max without(instance, job) (platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}))[30d:15m])) or on() vector(0))), 1)", "refId": "A", "instant": true } @@ -3969,7 +3969,7 @@ data: }, "targets": [ { - "expr": "sort((100 * (((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d]))) > bool 0) unless on(suite) ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\",branch!~\"main|master|origin/main|origin/master|unknown\"}[30d]))) > bool 0))) or on(suite) (0 * ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d]))) > bool 0)))", + "expr": "sort((100 * (((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d:15m]))) > bool 0) unless on(suite) ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\",branch!~\"main|master|origin/main|origin/master|unknown\"}[30d:15m]))) > bool 0))) or on(suite) (0 * ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d:15m]))) > bool 0)))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -4169,7 +4169,7 @@ data: }, "targets": [ { - "expr": "sort_desc(count by (suite, branch) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d])))", + "expr": "sort_desc(count by (suite, branch) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d:15m])))", "refId": "A", "legendFormat": "{{suite}} \u00b7 {{branch}}", "instant": true