From c43f7d84e8f51156821b7fcefb0386f0ba400e3e Mon Sep 17 00:00:00 2001 From: jenkins Date: Sat, 16 May 2026 13:48:01 -0300 Subject: [PATCH] monitoring: polish testing dashboard telemetry --- scripts/dashboards_render_atlas.py | 200 ++++++- scripts/tests/test_dashboards_render_atlas.py | 34 ++ services/jenkins/configmap-jcasc.yaml | 3 + .../monitoring/dashboards/atlas-overview.json | 129 ++-- .../monitoring/dashboards/atlas-testing.json | 562 ++++++++++-------- .../grafana-dashboard-overview.yaml | 129 ++-- .../monitoring/grafana-dashboard-testing.yaml | 562 ++++++++++-------- 7 files changed, 980 insertions(+), 639 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index a6b54b78..fa542a71 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -1515,6 +1515,16 @@ def row_panel(panel_id, title, y, *, collapsed=True, panels=None): } +def apply_panel_descriptions(panels, descriptions): + """Attach concise help text to panels, including panels inside collapsed rows.""" + for panel in panels: + title = panel.get("title") + if title in descriptions and not panel.get("description"): + panel["description"] = descriptions[title] + if panel.get("panels"): + apply_panel_descriptions(panel["panels"], descriptions) + + DASHBOARD_LINK_TITLES = { "atlas-overview": "Open Atlas Overview", "atlas-pods": "Open Atlas Pods", @@ -1545,6 +1555,121 @@ def overview_link_to(uid): return [{"title": f"Open {uid} dashboard", "url": f"/d/{uid}", "targetBlank": True}] +OVERVIEW_PANEL_DESCRIPTIONS = { + "Control Plane Ready": "Control-plane nodes currently Ready; full count is good, lower means Kubernetes core capacity is missing.", + "Control Plane Workloads": "Non-core pods running on control-plane nodes; zero is good because control nodes should stay focused.", + "Stuck Terminating": "Pods that Kubernetes cannot finish deleting; zero is good, growth means cleanup or storage may be stuck.", + "Atlas Availability (365d)": "Rolling one-year Atlas availability; higher is better, below target means users saw downtime.", + "Problem Pods": "Pods in unhealthy phases; zero is good, any count means a workload needs attention.", + "CrashLoop / ImagePull": "Pods restarting or unable to pull images; zero is good, any count usually blocks a service.", + "Workers Ready": "Worker nodes currently Ready; full count is good, lower means less place to run services.", + "Hottest node: CPU": "Highest worker CPU load right now; lower is calmer, hot nodes may need pods moved.", + "Hottest node: RAM": "Highest worker memory use right now; lower is safer, high values risk evictions.", + "Hottest node: NET (rx+tx)": "Busiest node network rate; spikes can reveal traffic concentration or noisy services.", + "Hottest node: I/O (r+w)": "Busiest node disk I/O rate; high values can explain slow storage-backed apps.", + "Astreae Usage": "Percent of Astreae used; lower is safer, high values reduce storage headroom.", + "Asteria Usage": "Percent of Asteria used; lower is safer, high values reduce storage headroom.", + "Astreae Free": "Free space on Astreae; higher is better for backups and workload growth.", + "Asteria Free": "Free space on Asteria; higher is better for backups and workload growth.", + "Pyrphoros UPS Current": "Live Pyrphoros UPS draw and runtime; stable runtime means the lab can ride out short outages.", + "Statera UPS Current": "Live Statera UPS draw and runtime; stable runtime means the lab can ride out short outages.", + "UPS History (Power Draw)": "UPS power draw over time; steady draw is normal, spikes show sudden load changes.", + "Current Enclosure Temperature": "Current tent temperature in C and F; moderate values protect hardware and plants.", + "Current Enclosure Climate": "Current humidity and VPD; in-range values mean the enclosure climate is stable.", + "Enclosure Climate History": "Temperature, humidity, and VPD over time; smooth movement is healthy, sharp swings need attention.", + "Fan Intensity History": "Fan levels from Off to 10; warmer colors mean stronger cooling response and more thermal pressure.", + "Flux Source": "Git branch Flux is applying; this should normally be the intended production branch.", + "Run Reliability (24h)": "Percent of CI runs that completed successfully in 24h; higher is better for release confidence.", + "Failed Runs (24h)": "CI runs that failed in 24h; zero is good, any value means recent test signal is noisy.", + "Fresh Suites (24h)": "Suites with at least one run in 24h; higher is better because stale suites hide failures.", + "Avg Coverage": "Average latest line coverage across suites; higher means code is better protected by tests.", + "LOC Clean Suites": "Suites with no source files over 500 LOC; full count is good for maintainability.", + "GitOps Health": "Flux readiness and suspension health over time; blue is perfect, warmer colors mean drift or pause.", + "One-off Job Pods (age hours)": "Temporary job pods by age; low or empty is good, old pods usually need cleanup.", + "Ariadne Run Volume": "Ariadne automation attempts and failures; attempts show activity, failures show work to investigate.", + "Gate Checks Passing by Suite": "Current quality gates passing per suite; blue means all gates pass, warmer colors mean blockers.", + "Jenkins Last Success (h, newest first)": "Age of recent Jenkins successes; lower is fresher and better.", + "Jenkins Last Failure (h, newest first)": "Age of recent Jenkins failures; lower means a failure happened more recently.", + "PVC Backup Health / Age": "Restic backup age by PVC; lower is better, very old backups mean restore risk.", + "Mail Sent (1d)": "Outbound mail sent in the last day; useful context for mail health and bounce rates.", + "Mail Bounces (1d)": "Outbound mail bounce rate and count; zero is best, high values risk delivery reputation.", + "Mail Success Rate (1d)": "Outbound mail success rate; higher is better for user notifications.", + "Mail Limit Used (30d)": "Postmark monthly send limit used; lower leaves more quota headroom.", + "Postgres Connections Used": "Current Postgres connections; lower leaves room for apps during spikes.", + "Postgres Hottest Connections": "Database with the most active connections; high values identify the pressure source.", + "Namespace CPU Share": "CPU share by namespace in the selected scope; big slices show who is using compute.", + "Namespace GPU Share": "GPU share by namespace in the selected scope; idle is good unless GPU work is expected.", + "Namespace RAM Share": "Memory share by namespace in the selected scope; big slices show who may drive pressure.", + "Worker Node CPU": "Worker CPU over time; lower is calmer, sustained high load may need rescheduling.", + "Worker Node RAM": "Worker memory over time; lower is safer, sustained high use risks evictions.", + "Control plane CPU": "Control-plane CPU over time; low steady usage means Kubernetes has control headroom.", + "Control plane RAM": "Control-plane memory over time; low steady usage means Kubernetes has control headroom.", + "Node Pod Share": "Share of pods per node; uneven share can reveal overloaded workers.", + "Top Nodes by Pod Count": "Nodes with the most pods; lower and balanced is easier to operate.", + "Cluster Ingress Throughput": "Traffic entering the cluster; spikes should line up with expected usage.", + "Cluster Egress Throughput": "Traffic leaving the cluster; spikes should line up with expected usage.", + "Intra-Cluster Throughput": "Traffic inside the cluster; high values can expose chatty services.", + "Root Filesystem Usage": "Node root disk usage; lower is safer, high values can break kubelet.", + "Nodes Closest to Full Astraios Disks": "Astraios disk fullness by node; lower is safer for storage reliability.", +} + + +TESTING_PANEL_DESCRIPTIONS = { + "Run Reliability (24h)": "Percent of selected CI runs that finished successfully in 24h; higher is better.", + "Run Reliability (30d)": "Percent of selected CI runs that finished successfully in 30d; higher shows stable automation.", + "Failed Runs (24h)": "Selected CI runs that failed in 24h; zero is good and anything else needs a look.", + "Runs (24h)": "Selected CI run count in 24h; zero means the dashboard may be stale.", + "Avg Coverage (%)": "Average latest line coverage for selected suites; higher means better test protection.", + "Suites with LOC >500": "Selected suites with oversized source files; zero is good for maintainability.", + "Current Gate Health by Suite": "Latest gate pass percent per suite; 100% means all required checks currently pass.", + "Run Reliability by Suite (24h)": "24h run success by suite; lower rows are worse and can lag after failed/debug runs.", + "Coverage by Suite (Latest, gate 95)": "Latest suite coverage; 95%+ is acceptable and 100% is strongest.", + "Files <=500 LOC by Suite (Latest)": "Percent of source files within the 500-line limit; higher is easier to maintain.", + "Reliability And Run History": "Recent run, coverage, LOC, and category trends for selected suites.", + "Run Reliability by Suite (7d rolling)": "Seven-day rolling run success by suite; blue lanes mean stable CI.", + "Test Category Pass Rate History": "Pass rate by test category; use the Suite filter to focus on one project.", + "Daily Run Volume (Selected Scope)": "Rolling daily counts of successful and failed runs; volume explains confidence.", + "Coverage History by Suite": "Coverage over time by suite; rising lines mean better test protection.", + "Files <=500 LOC History by Suite": "LOC compliance over time; blue lanes mean files stay within the size limit.", + "Check Failure Rates By Suite": "Failure percent by check family; blue is zero failures, warmer colors show blockers.", + "Tests Failure Rate": "Percent of test checks currently failing; blue means tests are clean.", + "Coverage Failure Rate": "Percent of coverage checks currently failing; blue means coverage gates pass.", + "LOC Failure Rate": "Percent of LOC checks currently failing; blue means file size gates pass.", + "Style Failure Rate": "Percent of style checks currently failing; blue means style/docs gates pass.", + "Gate Glue Failure Rate": "Percent of metric-contract checks failing; blue means dashboard telemetry is trustworthy.", + "SonarQube Failure Rate": "Percent of Sonar checks failing; blue means Sonar quality gates pass.", + "Supply Chain Failure Rate": "Percent of supply-chain checks failing; blue means artifact/image checks pass.", + "Check Healthy Rates By Suite": "Healthy percent by check family; blue means all selected checks are good.", + "Tests Healthy Rate": "Percent of test checks passing or not applicable; higher is better.", + "Coverage Healthy Rate": "Percent of coverage checks passing or not applicable; higher is better.", + "LOC Healthy Rate": "Percent of LOC checks passing or not applicable; higher is better.", + "Style Healthy Rate": "Percent of style checks passing or not applicable; higher is better.", + "Gate Glue Healthy Rate": "Percent of telemetry-contract checks passing; higher means cleaner reporting.", + "SonarQube Healthy Rate": "Percent of Sonar checks passing or not applicable; higher is better.", + "Supply Chain Healthy Rate": "Percent of supply-chain checks passing or not applicable; higher is better.", + "Test Drilldowns And Problem Tests": "Test-case detail for finding which tests are hurting reliability.", + "Problematic Tests Over Time (Top failures)": "Top tests failing in each hourly bucket; old totals may only appear in the 30d panel.", + "Most Problematic Test by Suite (30d)": "Worst test per suite summed over 30d; high counts can be historical debt.", + "Selected Test Pass/Fail History": "Hourly pass/fail/skipped volume for the selected test filter.", + "Selected Test Pass Rate History": "Pass rate history for the selected test filter; higher means the test is stable.", + "Telemetry Completeness And Branches": "Checks that each suite publishes the data this dashboard needs.", + "Tests Metrics Present by Suite": "Whether suite-level test counts are present; 100% means the suite is reporting.", + "Checks Metrics Present by Suite": "Whether gate check metrics are present; 100% means health panels have inputs.", + "Coverage Metrics Present by Suite": "Whether coverage metrics are present; 100% means coverage panels are reliable.", + "LOC Compliance Metrics Present by Suite": "Whether LOC metrics are present; 100% means size panels are reliable.", + "Test-Case Metrics Present by Suite": "Whether per-test metrics are present; 100% enables drilldowns.", + "Real Test Cases Present by Suite": "Whether real test names are present; 100% means not just placeholder telemetry.", + "Recent Branch Evidence by Suite (30d)": "Branches with recent CI evidence; unexpected branches can mean drift or stale work.", + "Primary Branch Clean by Suite (30d)": "Percent clean of non-primary branch evidence; 100% means only main/master is reporting.", + "SonarQube Project Health": "SonarQube availability, projects, fetch errors, and gate status.", + "SonarQube API Up": "Whether the SonarQube exporter can reach SonarQube; 1 is good.", + "Sonar Projects (Selected)": "Selected SonarQube project count; zero means Sonar is not tracking that suite.", + "Sonar Gate Fetch Errors": "Sonar exporter fetch errors; zero is good because stale Sonar data misleads.", + "Sonar Gate Status Mix (Selected)": "Mix of Sonar gate states; OK is good and non-OK needs cleanup.", + "Sonar Gate Health by Project": "Sonar gate health over time by project; blue means OK.", +} + + # --------------------------------------------------------------------------- # Dashboard builders # --------------------------------------------------------------------------- @@ -2651,6 +2776,7 @@ def build_overview(): links=overview_link("atlas-storage"), ) ) + apply_panel_descriptions(panels, OVERVIEW_PANEL_DESCRIPTIONS) return { "uid": "atlas-overview", "title": "Atlas Overview", @@ -3569,9 +3695,11 @@ def build_jobs_dashboard(): smell_selector = f'suite=~"{suite_var}",{exported}' build_info_selector = f'suite=~"{suite_var}",branch!="",branch=~"{branch_var}",{exported}' selected_suite_universe = ( - f'(count by (suite) (platform_quality_gate_build_info{{{build_info_selector}}}) >= bool 0)' + f'(count by (suite) (platform_quality_gate_build_info{{{build_info_selector}}}) >= bool 0) ' + f'or (count by (suite) (max_over_time(platform_quality_gate_runs_total{{{runs_selector}}}[30d])) >= bool 0)' ) - selected_suite_zero = f"(0 * {selected_suite_universe})" + selected_suite_zero = f"(0 * ({selected_suite_universe}))" + selected_suite_missing = f"(({selected_suite_zero}) - 1)" suite_universe = " or ".join( f'label_replace(vector(1), "suite", "{suite}", "__name__", ".*")' @@ -3596,9 +3724,9 @@ def build_jobs_dashboard(): f'sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_success_selector}}}[24h]))' ) success_rate_by_suite_24h = ( - f'sort_desc(((100 * ({success_by_suite_24h}) / clamp_min(({runs_by_suite_24h}), 1)) ' + f'((100 * ({success_by_suite_24h}) / clamp_min(({runs_by_suite_24h}), 1)) ' f'and on(suite) (({runs_by_suite_24h}) > 0)) ' - f'or on(suite) ((0 * ({runs_by_suite_24h})) - 1))' + f'or on(suite) ({selected_suite_missing})' ) non_failure = PLATFORM_TEST_NON_FAILURE_STATUS current_gate_ok_vector = ( @@ -3614,7 +3742,7 @@ def build_jobs_dashboard(): ) current_gate_health_by_suite = ( f"((100 * ({current_gate_ok_checks}) / clamp_min(({current_gate_seen_checks}), 1)) " - f"or on(suite) ({selected_suite_zero}))" + f"or on(suite) ({selected_suite_missing}))" ) success_history_runs = f'sum by (suite) (increase(platform_quality_gate_runs_total{{{runs_selector}}}[7d]))' success_history_by_suite = ( @@ -3744,9 +3872,9 @@ def build_jobs_dashboard(): f'count by (suite) (max_over_time(platform_quality_gate_build_info{{{build_info_selector}}}[30d]))' ) primary_branch_clean_by_suite = ( - f'sort_desc((100 * ((({branch_evidence_by_suite}) > bool 0) ' + f'(100 * ((({branch_evidence_by_suite}) > bool 0) ' f'unless on(suite) (({non_primary_branch_evidence}) > bool 0))) ' - f'or on(suite) (0 * (({branch_evidence_by_suite}) > bool 0)))' + f'or on(suite) (0 * (({branch_evidence_by_suite}) > bool 0))' ) def _missing_suite_series(presence_expr: str) -> str: @@ -3803,6 +3931,16 @@ def build_jobs_dashboard(): {"color": dark_red, "value": 5}, ], } + problematic_test_thresholds = { + "mode": "absolute", + "steps": [ + {"color": dark_blue, "value": None}, + {"color": dark_green, "value": 2}, + {"color": dark_yellow, "value": 3}, + {"color": dark_orange, "value": 5}, + {"color": dark_red, "value": 8}, + ], + } smell_thresholds = { "mode": "absolute", "steps": [ @@ -3908,9 +4046,12 @@ def build_jobs_dashboard(): decimals=2, ) ) + panels[-1]["fieldConfig"]["defaults"]["mappings"] = [ + {"type": "value", "options": {"-1": {"text": "missing"}}} + ] panels[-1]["description"] = ( "Current pass percentage across the required gate dimensions reported by each suite. " - "This is the fastest place to answer whether the latest suite quality signal is healthy." + "100% is clean; missing means the suite has not published current gate data." ) reliability_suite_panel = bargauge_panel( 9, @@ -4048,14 +4189,14 @@ def build_jobs_dashboard(): "Problematic Tests Over Time (Top failures)", problematic_tests_history, {"h": 8, "w": 12, "x": 0, "y": 57}, - thresholds=failures_thresholds, + thresholds=problematic_test_thresholds, unit="none", min_value=0, max_value=None, legend="{{suite}} - {{test}}", description=( - "Top failing test cases over time, using memoized hourly rollups. " - "Blank branch/test labels and placeholder no-test-case rows are excluded." + "Top tests inside each hourly bucket. A test can show only 1-2 here while the 30d panel " + "shows a larger total from earlier hours." ), ) ) @@ -4071,12 +4212,16 @@ def build_jobs_dashboard(): instant=True, legend="{{suite}} ยท {{test}}", sort_order="desc", - thresholds=failures_thresholds, + thresholds=problematic_test_thresholds, limit=9, links=jenkins_suite_links(), data_links=jenkins_latest_artifact_data_links(), ) ) + panels[-1]["description"] = ( + "Worst test per suite summed across 30d. This catches historical repeat offenders even when the " + "current hourly top list is quiet." + ) panels.append( timeseries_panel( 146, @@ -4127,8 +4272,8 @@ def build_jobs_dashboard(): thresholds=success_thresholds, legend="{{category}}", description=( - "Pass rate over time grouped by the test category label. Use the Suite filter to focus this " - "on one project; suites without category-aware publishers fall back to uncategorized." + "Pass rate by test category from current per-test metrics. Use the Suite filter to focus one " + "project; no data means that suite has not published category-aware results yet." ), ) category_pass_rate_panel["links"] = jenkins_suite_links() @@ -4178,7 +4323,7 @@ def build_jobs_dashboard(): unit="percent", instant=True, legend="{{suite}}", - sort_order="desc", + sort_order="asc", thresholds=success_thresholds, decimals=0, ) @@ -4192,7 +4337,7 @@ def build_jobs_dashboard(): unit="percent", instant=True, legend="{{suite}}", - sort_order="desc", + sort_order="asc", thresholds=success_thresholds, decimals=0, ) @@ -4206,7 +4351,7 @@ def build_jobs_dashboard(): unit="percent", instant=True, legend="{{suite}}", - sort_order="desc", + sort_order="asc", thresholds=success_thresholds, decimals=0, ) @@ -4220,7 +4365,7 @@ def build_jobs_dashboard(): unit="percent", instant=True, legend="{{suite}}", - sort_order="desc", + sort_order="asc", thresholds=success_thresholds, decimals=0, ) @@ -4298,7 +4443,7 @@ def build_jobs_dashboard(): unit="percent", instant=True, legend="{{suite}}", - sort_order="desc", + sort_order="asc", thresholds=success_thresholds, decimals=0, ) @@ -4312,7 +4457,7 @@ def build_jobs_dashboard(): unit="percent", instant=True, legend="{{suite}}", - sort_order="desc", + sort_order="asc", thresholds=success_thresholds, decimals=0, ) @@ -4341,7 +4486,7 @@ def build_jobs_dashboard(): unit="percent", instant=True, legend="{{suite}}", - sort_order="desc", + sort_order="asc", thresholds=success_thresholds, decimals=0, links=jenkins_suite_links(), @@ -4376,9 +4521,9 @@ def build_jobs_dashboard(): row_layout = { 11: {"h": 8, "w": 12, "x": 0, "y": 19}, 153: {"h": 8, "w": 12, "x": 12, "y": 19}, - 12: {"h": 8, "w": 12, "x": 0, "y": 27}, - 13: {"h": 8, "w": 6, "x": 12, "y": 27}, - 14: {"h": 8, "w": 6, "x": 18, "y": 27}, + 12: {"h": 8, "w": 8, "x": 0, "y": 27}, + 13: {"h": 8, "w": 8, "x": 8, "y": 27}, + 14: {"h": 8, "w": 8, "x": 16, "y": 27}, 145: {"h": 8, "w": 24, "x": 0, "y": 74}, 147: {"h": 8, "w": 8, "x": 0, "y": 83}, 146: {"h": 8, "w": 8, "x": 8, "y": 83}, @@ -4389,8 +4534,8 @@ def build_jobs_dashboard(): 30: {"h": 7, "w": 6, "x": 18, "y": 94}, 148: {"h": 7, "w": 6, "x": 0, "y": 101}, 151: {"h": 7, "w": 6, "x": 6, "y": 101}, - 149: {"h": 7, "w": 6, "x": 12, "y": 101}, - 150: {"h": 7, "w": 6, "x": 18, "y": 101}, + 150: {"h": 7, "w": 6, "x": 12, "y": 101}, + 149: {"h": 7, "w": 6, "x": 18, "y": 101}, 31: {"h": 6, "w": 4, "x": 0, "y": 111}, 32: {"h": 6, "w": 4, "x": 4, "y": 111}, 33: {"h": 6, "w": 4, "x": 8, "y": 111}, @@ -4425,7 +4570,7 @@ def build_jobs_dashboard(): 504, "Telemetry Completeness And Branches", 15, - panels=children([27, 28, 29, 30, 148, 151, 149, 150]), + panels=children([27, 28, 29, 30, 148, 151, 150, 149]), ), row_panel( 505, @@ -4437,6 +4582,7 @@ def build_jobs_dashboard(): ) panels = compact_panels set_bargauge_display_mode(panels, "basic") + apply_panel_descriptions(panels, TESTING_PANEL_DESCRIPTIONS) return { "uid": "atlas-jobs", diff --git a/scripts/tests/test_dashboards_render_atlas.py b/scripts/tests/test_dashboards_render_atlas.py index 8c70d3c7..fa52503e 100644 --- a/scripts/tests/test_dashboards_render_atlas.py +++ b/scripts/tests/test_dashboards_render_atlas.py @@ -142,6 +142,14 @@ def test_overview_uses_readable_quality_power_and_gitops_panels(): assert "kube_node_labels" not in gpu_expr +def test_overview_and_testing_panels_all_have_concise_descriptions(): + mod = load_module() + + for dashboard in [mod.build_overview(), mod.build_jobs_dashboard(), mod.build_testing_dashboard()]: + panels = flatten_panels(dashboard["panels"]) + assert all(panel.get("description") for panel in panels if panel["type"] != "row") + + def test_render_configmap_writes(tmp_path): mod = load_module() mod.DASHBOARD_DIR = tmp_path / "dash" @@ -213,6 +221,7 @@ def test_jobs_dashboard_separates_current_gate_health_from_reliability(): assert "platform_quality_gate_runs_total" in reliability_expr assert "> 0" in reliability_expr assert "- 1" in reliability_expr + assert reliability_expr.startswith("sort(") assert reliability_panel["fieldConfig"]["defaults"]["mappings"] == [ {"type": "value", "options": {"-1": {"text": "no runs"}}} ] @@ -224,12 +233,16 @@ def test_jobs_dashboard_separates_current_gate_health_from_reliability(): assert category_panel["type"] == "state-timeline" assert "category" in category_panel["targets"][0]["expr"] assert "Use the Suite filter" in category_panel["description"] + assert "category-aware results" in category_panel["description"] coverage_panel = panels_by_title["Coverage History by Suite"] loc_panel = panels_by_title["Files <=500 LOC History by Suite"] assert coverage_panel["type"] == "state-timeline" assert loc_panel["type"] == "state-timeline" assert coverage_panel["targets"][0]["expr"] != loc_panel["targets"][0]["expr"] + assert panels_by_title["Daily Run Volume (Selected Scope)"]["gridPos"] == {"h": 8, "w": 8, "x": 0, "y": 27} + assert coverage_panel["gridPos"] == {"h": 8, "w": 8, "x": 8, "y": 27} + assert loc_panel["gridPos"] == {"h": 8, "w": 8, "x": 16, "y": 27} run_volume_panel = panels_by_title["Daily Run Volume (Selected Scope)"] assert run_volume_panel["fieldConfig"]["defaults"]["custom"]["drawStyle"] == "bars" @@ -320,6 +333,14 @@ def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint(): assert problematic_panel["gridPos"]["w"] == 24 assert 'test!=""' in problematic_panel["targets"][0]["expr"] assert "vector(0)" not in problematic_panel["targets"][0]["expr"] + assert problematic_panel["fieldConfig"]["defaults"]["thresholds"]["steps"] == [ + {"color": "dark-blue", "value": None}, + {"color": "dark-green", "value": 2}, + {"color": "dark-yellow", "value": 3}, + {"color": "dark-orange", "value": 5}, + {"color": "dark-red", "value": 8}, + ] + assert "hourly bucket" in problematic_panel["description"] sonar_mix_panel = nested_panels_by_title["Sonar Gate Status Mix (Selected)"] sonar_health_panel = nested_panels_by_title["Sonar Gate Health by Project"] @@ -329,6 +350,19 @@ def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint(): assert "100 * max by (project_key)" in sonar_health_panel["targets"][0]["expr"] branch_panel = nested_panels_by_title["Primary Branch Clean by Suite (30d)"] + recent_branch_panel = nested_panels_by_title["Recent Branch Evidence by Suite (30d)"] + assert branch_panel["gridPos"]["x"] == 12 + assert recent_branch_panel["gridPos"]["x"] == 18 assert branch_panel["fieldConfig"]["defaults"]["unit"] == "percent" assert "unless on(suite)" in branch_panel["targets"][0]["expr"] assert "> bool 0" in branch_panel["targets"][0]["expr"] + assert branch_panel["targets"][0]["expr"].startswith("sort(") + + +def test_lesavka_jenkins_job_has_daily_refresh_trigger(): + casc = pathlib.Path("services/jenkins/configmap-jcasc.yaml").read_text() + lesavka_block = casc.split("pipelineJob('lesavka')", 1)[1].split("pipelineJob(", 1)[0] + + assert "scmpoll_spec('H/5 * * * *')" in lesavka_block + assert "cron" in lesavka_block + assert "spec('H H * * *')" in lesavka_block diff --git a/services/jenkins/configmap-jcasc.yaml b/services/jenkins/configmap-jcasc.yaml index 841f81cf..ed697afc 100644 --- a/services/jenkins/configmap-jcasc.yaml +++ b/services/jenkins/configmap-jcasc.yaml @@ -208,6 +208,9 @@ data: scmpoll_spec('H/5 * * * *') ignorePostCommitHooks(false) } + cron { + spec('H H * * *') + } } } } diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 2ce53b2d..6011f3d1 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -58,7 +58,8 @@ "orientation": "auto", "showThresholdMarkers": false, "showThresholdLabels": false - } + }, + "description": "Control-plane nodes currently Ready; full count is good, lower means Kubernetes core capacity is missing." }, { "id": 3, @@ -133,7 +134,8 @@ "url": "/d/atlas-pods", "targetBlank": true } - ] + ], + "description": "Non-core pods running on control-plane nodes; zero is good because control nodes should stay focused." }, { "id": 5, @@ -208,7 +210,8 @@ "url": "/d/atlas-pods", "targetBlank": true } - ] + ], + "description": "Pods that Kubernetes cannot finish deleting; zero is good, growth means cleanup or storage may be stuck." }, { "id": 27, @@ -358,7 +361,8 @@ "url": "/d/atlas-pods", "targetBlank": true } - ] + ], + "description": "Pods in unhealthy phases; zero is good, any count means a workload needs attention." }, { "id": 6, @@ -433,7 +437,8 @@ "url": "/d/atlas-pods", "targetBlank": true } - ] + ], + "description": "Pods restarting or unable to pull images; zero is good, any count usually blocks a service." }, { "id": 1, @@ -494,7 +499,8 @@ "orientation": "auto", "showThresholdMarkers": false, "showThresholdLabels": false - } + }, + "description": "Worker nodes currently Ready; full count is good, lower means less place to run services." }, { "id": 7, @@ -571,7 +577,8 @@ "url": "/d/atlas-nodes", "targetBlank": true } - ] + ], + "description": "Highest worker CPU load right now; lower is calmer, hot nodes may need pods moved." }, { "id": 8, @@ -648,7 +655,8 @@ "url": "/d/atlas-nodes", "targetBlank": true } - ] + ], + "description": "Highest worker memory use right now; lower is safer, high values risk evictions." }, { "id": 9, @@ -717,7 +725,8 @@ "url": "/d/atlas-nodes", "targetBlank": true } - ] + ], + "description": "Busiest node network rate; spikes can reveal traffic concentration or noisy services." }, { "id": 10, @@ -786,7 +795,8 @@ "url": "/d/atlas-nodes", "targetBlank": true } - ] + ], + "description": "Busiest node disk I/O rate; high values can explain slow storage-backed apps." }, { "id": 23, @@ -861,7 +871,8 @@ "url": "/d/atlas-storage", "targetBlank": true } - ] + ], + "description": "Percent of Astreae used; lower is safer, high values reduce storage headroom." }, { "id": 24, @@ -936,7 +947,8 @@ "url": "/d/atlas-storage", "targetBlank": true } - ] + ], + "description": "Percent of Asteria used; lower is safer, high values reduce storage headroom." }, { "id": 25, @@ -1003,7 +1015,8 @@ "url": "/d/atlas-storage", "targetBlank": true } - ] + ], + "description": "Free space on Astreae; higher is better for backups and workload growth." }, { "id": 26, @@ -1070,7 +1083,8 @@ "url": "/d/atlas-storage", "targetBlank": true } - ] + ], + "description": "Free space on Asteria; higher is better for backups and workload growth." }, { "id": 40, @@ -1168,7 +1182,8 @@ "url": "/d/atlas-power", "targetBlank": true } - ] + ], + "description": "Live Pyrphoros UPS draw and runtime; stable runtime means the lab can ride out short outages." }, { "id": 144, @@ -1266,7 +1281,8 @@ "url": "/d/atlas-power", "targetBlank": true } - ] + ], + "description": "Live Statera UPS draw and runtime; stable runtime means the lab can ride out short outages." }, { "id": 41, @@ -1354,7 +1370,8 @@ "url": "/d/atlas-power", "targetBlank": true } - ] + ], + "description": "UPS power draw over time; steady draw is normal, spikes show sudden load changes." }, { "id": 42, @@ -1452,7 +1469,8 @@ "url": "/d/atlas-power", "targetBlank": true } - ] + ], + "description": "Current tent temperature in C and F; moderate values protect hardware and plants." }, { "id": 143, @@ -1550,7 +1568,8 @@ "url": "/d/atlas-power", "targetBlank": true } - ] + ], + "description": "Current humidity and VPD; in-range values mean the enclosure climate is stable." }, { "id": 43, @@ -2175,7 +2194,8 @@ "url": "/d/atlas-testing", "targetBlank": true } - ] + ], + "description": "Percent of CI runs that completed successfully in 24h; higher is better for release confidence." }, { "id": 152, @@ -2256,7 +2276,8 @@ "url": "/d/atlas-testing", "targetBlank": true } - ] + ], + "description": "CI runs that failed in 24h; zero is good, any value means recent test signal is noisy." }, { "id": 153, @@ -2337,7 +2358,8 @@ "url": "/d/atlas-testing", "targetBlank": true } - ] + ], + "description": "Suites with at least one run in 24h; higher is better because stale suites hide failures." }, { "id": 154, @@ -2422,7 +2444,8 @@ "url": "/d/atlas-testing", "targetBlank": true } - ] + ], + "description": "Average latest line coverage across suites; higher means code is better protected by tests." }, { "id": 155, @@ -2503,7 +2526,8 @@ "url": "/d/atlas-testing", "targetBlank": true } - ] + ], + "description": "Suites with no source files over 500 LOC; full count is good for maintainability." }, { "id": 150, @@ -2674,7 +2698,8 @@ "limit": 12 } } - ] + ], + "description": "Temporary job pods by age; low or empty is good, old pods usually need cleanup." }, { "id": 45, @@ -2764,7 +2789,8 @@ "url": "/d/atlas-testing", "targetBlank": true } - ] + ], + "description": "Ariadne automation attempts and failures; attempts show activity, failures show work to investigate." }, { "id": 46, @@ -3210,7 +3236,8 @@ "url": "/d/atlas-mail", "targetBlank": true } - ] + ], + "description": "Outbound mail sent in the last day; useful context for mail health and bounce rates." }, { "id": 31, @@ -3315,7 +3342,8 @@ "url": "/d/atlas-mail", "targetBlank": true } - ] + ], + "description": "Outbound mail bounce rate and count; zero is best, high values risk delivery reputation." }, { "id": 32, @@ -3391,7 +3419,8 @@ "url": "/d/atlas-mail", "targetBlank": true } - ] + ], + "description": "Outbound mail success rate; higher is better for user notifications." }, { "id": 33, @@ -3467,7 +3496,8 @@ "url": "/d/atlas-mail", "targetBlank": true } - ] + ], + "description": "Postmark monthly send limit used; lower leaves more quota headroom." }, { "id": 34, @@ -3530,7 +3560,8 @@ "values": false }, "textMode": "name_and_value" - } + }, + "description": "Current Postgres connections; lower leaves room for apps during spikes." }, { "id": 35, @@ -3593,7 +3624,8 @@ "values": false }, "textMode": "name_and_value" - } + }, + "description": "Database with the most active connections; high values identify the pressure source." }, { "id": 11, @@ -3847,7 +3879,8 @@ "url": "/d/atlas-nodes", "targetBlank": true } - ] + ], + "description": "Worker CPU over time; lower is calmer, sustained high load may need rescheduling." }, { "id": 15, @@ -3894,7 +3927,8 @@ "url": "/d/atlas-nodes", "targetBlank": true } - ] + ], + "description": "Worker memory over time; lower is safer, sustained high use risks evictions." }, { "id": 16, @@ -3931,7 +3965,8 @@ "tooltip": { "mode": "multi" } - } + }, + "description": "Control-plane CPU over time; low steady usage means Kubernetes has control headroom." }, { "id": 17, @@ -3968,7 +4003,8 @@ "tooltip": { "mode": "multi" } - } + }, + "description": "Control-plane memory over time; low steady usage means Kubernetes has control headroom." }, { "id": 28, @@ -4019,7 +4055,8 @@ "fields": "", "values": false } - } + }, + "description": "Share of pods per node; uneven share can reveal overloaded workers." }, { "id": 29, @@ -4100,7 +4137,8 @@ "limit": 12 } } - ] + ], + "description": "Nodes with the most pods; lower and balanced is easier to operate." }, { "id": 18, @@ -4144,7 +4182,8 @@ "url": "/d/atlas-network", "targetBlank": true } - ] + ], + "description": "Traffic entering the cluster; spikes should line up with expected usage." }, { "id": 19, @@ -4188,7 +4227,8 @@ "url": "/d/atlas-network", "targetBlank": true } - ] + ], + "description": "Traffic leaving the cluster; spikes should line up with expected usage." }, { "id": 20, @@ -4232,7 +4272,8 @@ "url": "/d/atlas-network", "targetBlank": true } - ] + ], + "description": "Traffic inside the cluster; high values can expose chatty services." }, { "id": 21, @@ -4280,7 +4321,8 @@ "url": "/d/atlas-storage", "targetBlank": true } - ] + ], + "description": "Node root disk usage; lower is safer, high values can break kubelet." }, { "id": 22, @@ -4328,7 +4370,8 @@ "url": "/d/atlas-storage", "targetBlank": true } - ] + ], + "description": "Astraios disk fullness by node; lower is safer for storage reliability." } ], "schemaVersion": 39, diff --git a/services/monitoring/dashboards/atlas-testing.json b/services/monitoring/dashboards/atlas-testing.json index e4d79500..d189e240 100644 --- a/services/monitoring/dashboards/atlas-testing.json +++ b/services/monitoring/dashboards/atlas-testing.json @@ -76,7 +76,8 @@ "values": false }, "textMode": "value" - } + }, + "description": "Percent of selected CI runs that finished successfully in 24h; higher is better." }, { "id": 3, @@ -150,7 +151,8 @@ "values": false }, "textMode": "value" - } + }, + "description": "Percent of selected CI runs that finished successfully in 30d; higher shows stable automation." }, { "id": 4, @@ -223,7 +225,8 @@ "values": false }, "textMode": "value" - } + }, + "description": "Selected CI runs that failed in 24h; zero is good and anything else needs a look." }, { "id": 5, @@ -284,7 +287,8 @@ "values": false }, "textMode": "value" - } + }, + "description": "Selected CI run count in 24h; zero means the dashboard may be stale." }, { "id": 6, @@ -358,7 +362,8 @@ "values": false }, "textMode": "value" - } + }, + "description": "Average latest line coverage for selected suites; higher means better test protection." }, { "id": 7, @@ -431,7 +436,8 @@ "values": false }, "textMode": "value" - } + }, + "description": "Selected suites with oversized source files; zero is good for maintainability." }, { "id": 8, @@ -449,7 +455,7 @@ }, "targets": [ { - "expr": "sort(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"} > bool 0)))), 1)) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0)))))", + "expr": "sort(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"} > bool 0)))), 1)) or on(suite) ((((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))) - 1))))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -488,7 +494,17 @@ } ] }, - "decimals": 2 + "decimals": 2, + "mappings": [ + { + "type": "value", + "options": { + "-1": { + "text": "missing" + } + } + } + ] }, "overrides": [] }, @@ -514,7 +530,7 @@ } } ], - "description": "Current pass percentage across the required gate dimensions reported by each suite. This is the fastest place to answer whether the latest suite quality signal is healthy." + "description": "Current pass percentage across the required gate dimensions reported by each suite. 100% is clean; missing means the suite has not published current gate data." }, { "id": 9, @@ -532,7 +548,7 @@ }, "targets": [ { - "expr": "sort_desc(((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[24h]))) > 0)) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[24h])))) - 1))", + "expr": "sort(((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[24h]))) > 0)) or on(suite) ((((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))) - 1)))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -699,7 +715,8 @@ "order": "asc" } } - ] + ], + "description": "Latest suite coverage; 95%+ is acceptable and 100% is strongest." }, { "id": 18, @@ -886,7 +903,7 @@ "id": 153, "type": "state-timeline", "title": "Test Category Pass Rate History", - "description": "Pass rate over time grouped by the test category label. Use the Suite filter to focus this on one project; suites without category-aware publishers fall back to uncategorized.", + "description": "Pass rate by test category from current per-test metrics. Use the Suite filter to focus one project; no data means that suite has not published category-aware results yet.", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1075,7 +1092,7 @@ }, "gridPos": { "h": 8, - "w": 12, + "w": 8, "x": 0, "y": 27 }, @@ -1130,8 +1147,8 @@ }, "gridPos": { "h": 8, - "w": 6, - "x": 12, + "w": 8, + "x": 8, "y": 27 }, "targets": [ @@ -1206,8 +1223,8 @@ }, "gridPos": { "h": 8, - "w": 6, - "x": 18, + "w": 8, + "x": 16, "y": 27 }, "targets": [ @@ -1271,7 +1288,8 @@ } } } - ] + ], + "description": "Recent run, coverage, LOC, and category trends for selected suites." }, { "id": 501, @@ -1302,7 +1320,7 @@ }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1378,7 +1396,7 @@ }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1454,7 +1472,7 @@ }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1530,7 +1548,7 @@ }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1606,7 +1624,7 @@ }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1682,7 +1700,7 @@ }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1758,7 +1776,7 @@ }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1817,7 +1835,8 @@ } } } - ] + ], + "description": "Failure percent by check family; blue is zero failures, warmer colors show blockers." }, { "id": 502, @@ -1848,7 +1867,7 @@ }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1924,7 +1943,7 @@ }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2000,7 +2019,7 @@ }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2076,7 +2095,7 @@ }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2152,7 +2171,7 @@ }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2228,7 +2247,7 @@ }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2304,7 +2323,7 @@ }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2363,7 +2382,8 @@ } } } - ] + ], + "description": "Healthy percent by check family; blue means all selected checks are good." }, { "id": 503, @@ -2381,7 +2401,7 @@ "id": 145, "type": "state-timeline", "title": "Problematic Tests Over Time (Top failures)", - "description": "Top failing test cases over time, using memoized hourly rollups. Blank branch/test labels and placeholder no-test-case rows are excluded.", + "description": "Top tests inside each hourly bucket. A test can show only 1-2 here while the 30d panel shows a larger total from earlier hours.", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -2414,19 +2434,19 @@ }, { "color": "dark-green", - "value": 0.01 + "value": 2 }, { "color": "dark-yellow", - "value": 1 - }, - { - "color": "dark-orange", "value": 3 }, { - "color": "dark-red", + "color": "dark-orange", "value": 5 + }, + { + "color": "dark-red", + "value": 8 } ] }, @@ -2610,19 +2630,19 @@ }, { "color": "dark-green", - "value": 0.01 + "value": 2 }, { "color": "dark-yellow", - "value": 1 - }, - { - "color": "dark-orange", "value": 3 }, { - "color": "dark-red", + "color": "dark-orange", "value": 5 + }, + { + "color": "dark-red", + "value": 8 } ] }, @@ -2775,7 +2795,8 @@ "limit": 9 } } - ] + ], + "description": "Worst test per suite summed across 30d. This catches historical repeat offenders even when the current hourly top list is quiet." }, { "id": 146, @@ -3150,7 +3171,8 @@ } ] } - ] + ], + "description": "Test-case detail for finding which tests are hurting reliability." }, { "id": 504, @@ -3180,7 +3202,7 @@ }, "targets": [ { - "expr": "sort_desc((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) ({__name__=~\".*_quality_gate_tests_total\",exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", + "expr": "sort((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) ({__name__=~\".*_quality_gate_tests_total\",exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -3241,10 +3263,11 @@ "fields": [ "Value" ], - "order": "desc" + "order": "asc" } } - ] + ], + "description": "Whether suite-level test counts are present; 100% means the suite is reporting." }, { "id": 28, @@ -3262,7 +3285,7 @@ }, "targets": [ { - "expr": "sort_desc((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) ({__name__=~\".*_quality_gate_checks_total\",exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", + "expr": "sort((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) ({__name__=~\".*_quality_gate_checks_total\",exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -3323,10 +3346,11 @@ "fields": [ "Value" ], - "order": "desc" + "order": "asc" } } - ] + ], + "description": "Whether gate check metrics are present; 100% means health panels have inputs." }, { "id": 29, @@ -3344,7 +3368,7 @@ }, "targets": [ { - "expr": "sort_desc((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_workspace_line_coverage_percent{exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", + "expr": "sort((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_workspace_line_coverage_percent{exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -3405,10 +3429,11 @@ "fields": [ "Value" ], - "order": "desc" + "order": "asc" } } - ] + ], + "description": "Whether coverage metrics are present; 100% means coverage panels are reliable." }, { "id": 30, @@ -3426,7 +3451,7 @@ }, "targets": [ { - "expr": "sort_desc((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_source_lines_over_500_total{exported_job=\"platform-quality-ci\"}) and on(suite) count by (suite) (platform_quality_gate_source_files_total{exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", + "expr": "sort((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_source_lines_over_500_total{exported_job=\"platform-quality-ci\"}) and on(suite) count by (suite) (platform_quality_gate_source_files_total{exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -3487,10 +3512,11 @@ "fields": [ "Value" ], - "order": "desc" + "order": "asc" } } - ] + ], + "description": "Whether LOC metrics are present; 100% means size panels are reliable." }, { "id": 148, @@ -3508,7 +3534,7 @@ }, "targets": [ { - "expr": "sort_desc((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_test_case_result{exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", + "expr": "sort((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_test_case_result{exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -3569,10 +3595,11 @@ "fields": [ "Value" ], - "order": "desc" + "order": "asc" } } - ] + ], + "description": "Whether per-test metrics are present; 100% enables drilldowns." }, { "id": 151, @@ -3590,7 +3617,7 @@ }, "targets": [ { - "expr": "sort_desc((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_test_case_result{exported_job=\"platform-quality-ci\",test!=\"__no_test_cases__\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", + "expr": "sort((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_test_case_result{exported_job=\"platform-quality-ci\",test!=\"__no_test_cases__\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -3651,10 +3678,201 @@ "fields": [ "Value" ], - "order": "desc" + "order": "asc" } } - ] + ], + "description": "Whether real test names are present; 100% means not just placeholder telemetry." + }, + { + "id": 150, + "type": "bargauge", + "title": "Primary Branch Clean by Suite (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 101 + }, + "targets": [ + { + "expr": "sort((100 * (((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d]))) > bool 0) unless on(suite) ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\",branch!~\"main|master|origin/main|origin/master|unknown\"}[30d]))) > bool 0))) or on(suite) (0 * ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d]))) > bool 0)))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "unit": "percent", + "min": 0, + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "displayMode": "basic", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "links": [ + { + "title": "Open Jenkins", + "url": "${jenkins_base}/", + "targetBlank": true + }, + { + "title": "ariadne: Job", + "url": "${jenkins_base}/job/ariadne/", + "targetBlank": true + }, + { + "title": "ariadne: Last Artifacts", + "url": "${jenkins_base}/job/ariadne/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "metis: Job", + "url": "${jenkins_base}/job/metis/", + "targetBlank": true + }, + { + "title": "metis: Last Artifacts", + "url": "${jenkins_base}/job/metis/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "ananke: Job", + "url": "${jenkins_base}/job/ananke/", + "targetBlank": true + }, + { + "title": "ananke: Last Artifacts", + "url": "${jenkins_base}/job/ananke/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "atlasbot: Job", + "url": "${jenkins_base}/job/atlasbot/", + "targetBlank": true + }, + { + "title": "atlasbot: Last Artifacts", + "url": "${jenkins_base}/job/atlasbot/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "lesavka: Job", + "url": "${jenkins_base}/job/lesavka/", + "targetBlank": true + }, + { + "title": "lesavka: Last Artifacts", + "url": "${jenkins_base}/job/lesavka/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "pegasus: Job", + "url": "${jenkins_base}/job/pegasus/", + "targetBlank": true + }, + { + "title": "pegasus: Last Artifacts", + "url": "${jenkins_base}/job/pegasus/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "soteria: Job", + "url": "${jenkins_base}/job/Soteria/", + "targetBlank": true + }, + { + "title": "soteria: Last Artifacts", + "url": "${jenkins_base}/job/Soteria/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "titan_iac: Job", + "url": "${jenkins_base}/job/titan-iac/", + "targetBlank": true + }, + { + "title": "titan_iac: Last Artifacts", + "url": "${jenkins_base}/job/titan-iac/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "bstein_home: Job", + "url": "${jenkins_base}/job/bstein-dev-home/", + "targetBlank": true + }, + { + "title": "bstein_home: Last Artifacts", + "url": "${jenkins_base}/job/bstein-dev-home/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "data_prepper: Job", + "url": "${jenkins_base}/job/data-prepper/", + "targetBlank": true + }, + { + "title": "data_prepper: Last Artifacts", + "url": "${jenkins_base}/job/data-prepper/lastCompletedBuild/artifact/", + "targetBlank": true + } + ], + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "asc" + } + } + ], + "description": "Percent clean of non-primary branch evidence; 100% means only main/master is reporting." }, { "id": 149, @@ -3667,7 +3885,7 @@ "gridPos": { "h": 7, "w": 6, - "x": 12, + "x": 18, "y": 101 }, "targets": [ @@ -3831,198 +4049,11 @@ "order": "desc" } } - ] - }, - { - "id": 150, - "type": "bargauge", - "title": "Primary Branch Clean by Suite (30d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 101 - }, - "targets": [ - { - "expr": "sort_desc((100 * (((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d]))) > bool 0) unless on(suite) ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\",branch!~\"main|master|origin/main|origin/master|unknown\"}[30d]))) > bool 0))) or on(suite) (0 * ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d]))) > bool 0)))", - "refId": "A", - "legendFormat": "{{suite}}", - "instant": true - } ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "unit": "percent", - "min": 0, - "max": 100, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "dark-red", - "value": null - }, - { - "color": "dark-orange", - "value": 90 - }, - { - "color": "dark-yellow", - "value": 93 - }, - { - "color": "dark-green", - "value": 95 - }, - { - "color": "dark-blue", - "value": 100 - } - ] - }, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "displayMode": "basic", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "links": [ - { - "title": "Open Jenkins", - "url": "${jenkins_base}/", - "targetBlank": true - }, - { - "title": "ariadne: Job", - "url": "${jenkins_base}/job/ariadne/", - "targetBlank": true - }, - { - "title": "ariadne: Last Artifacts", - "url": "${jenkins_base}/job/ariadne/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "metis: Job", - "url": "${jenkins_base}/job/metis/", - "targetBlank": true - }, - { - "title": "metis: Last Artifacts", - "url": "${jenkins_base}/job/metis/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "ananke: Job", - "url": "${jenkins_base}/job/ananke/", - "targetBlank": true - }, - { - "title": "ananke: Last Artifacts", - "url": "${jenkins_base}/job/ananke/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "atlasbot: Job", - "url": "${jenkins_base}/job/atlasbot/", - "targetBlank": true - }, - { - "title": "atlasbot: Last Artifacts", - "url": "${jenkins_base}/job/atlasbot/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "lesavka: Job", - "url": "${jenkins_base}/job/lesavka/", - "targetBlank": true - }, - { - "title": "lesavka: Last Artifacts", - "url": "${jenkins_base}/job/lesavka/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "pegasus: Job", - "url": "${jenkins_base}/job/pegasus/", - "targetBlank": true - }, - { - "title": "pegasus: Last Artifacts", - "url": "${jenkins_base}/job/pegasus/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "soteria: Job", - "url": "${jenkins_base}/job/Soteria/", - "targetBlank": true - }, - { - "title": "soteria: Last Artifacts", - "url": "${jenkins_base}/job/Soteria/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "titan_iac: Job", - "url": "${jenkins_base}/job/titan-iac/", - "targetBlank": true - }, - { - "title": "titan_iac: Last Artifacts", - "url": "${jenkins_base}/job/titan-iac/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "bstein_home: Job", - "url": "${jenkins_base}/job/bstein-dev-home/", - "targetBlank": true - }, - { - "title": "bstein_home: Last Artifacts", - "url": "${jenkins_base}/job/bstein-dev-home/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "data_prepper: Job", - "url": "${jenkins_base}/job/data-prepper/", - "targetBlank": true - }, - { - "title": "data_prepper: Last Artifacts", - "url": "${jenkins_base}/job/data-prepper/lastCompletedBuild/artifact/", - "targetBlank": true - } - ], - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] + "description": "Branches with recent CI evidence; unexpected branches can mean drift or stale work." } - ] + ], + "description": "Checks that each suite publishes the data this dashboard needs." }, { "id": 505, @@ -4095,7 +4126,8 @@ "values": false }, "textMode": "value" - } + }, + "description": "Whether the SonarQube exporter can reach SonarQube; 1 is good." }, { "id": 32, @@ -4168,7 +4200,8 @@ "values": false }, "textMode": "value" - } + }, + "description": "Selected SonarQube project count; zero means Sonar is not tracking that suite." }, { "id": 33, @@ -4241,7 +4274,8 @@ "values": false }, "textMode": "value" - } + }, + "description": "Sonar exporter fetch errors; zero is good because stale Sonar data misleads." }, { "id": 34, @@ -4292,7 +4326,8 @@ "fields": "", "values": false } - } + }, + "description": "Mix of Sonar gate states; OK is good and non-OK needs cleanup." }, { "id": 35, @@ -4370,7 +4405,8 @@ } } } - ] + ], + "description": "SonarQube availability, projects, fetch errors, and gate status." } ], "time": { diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 34198f6b..0c764294 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -67,7 +67,8 @@ data: "orientation": "auto", "showThresholdMarkers": false, "showThresholdLabels": false - } + }, + "description": "Control-plane nodes currently Ready; full count is good, lower means Kubernetes core capacity is missing." }, { "id": 3, @@ -142,7 +143,8 @@ data: "url": "/d/atlas-pods", "targetBlank": true } - ] + ], + "description": "Non-core pods running on control-plane nodes; zero is good because control nodes should stay focused." }, { "id": 5, @@ -217,7 +219,8 @@ data: "url": "/d/atlas-pods", "targetBlank": true } - ] + ], + "description": "Pods that Kubernetes cannot finish deleting; zero is good, growth means cleanup or storage may be stuck." }, { "id": 27, @@ -367,7 +370,8 @@ data: "url": "/d/atlas-pods", "targetBlank": true } - ] + ], + "description": "Pods in unhealthy phases; zero is good, any count means a workload needs attention." }, { "id": 6, @@ -442,7 +446,8 @@ data: "url": "/d/atlas-pods", "targetBlank": true } - ] + ], + "description": "Pods restarting or unable to pull images; zero is good, any count usually blocks a service." }, { "id": 1, @@ -503,7 +508,8 @@ data: "orientation": "auto", "showThresholdMarkers": false, "showThresholdLabels": false - } + }, + "description": "Worker nodes currently Ready; full count is good, lower means less place to run services." }, { "id": 7, @@ -580,7 +586,8 @@ data: "url": "/d/atlas-nodes", "targetBlank": true } - ] + ], + "description": "Highest worker CPU load right now; lower is calmer, hot nodes may need pods moved." }, { "id": 8, @@ -657,7 +664,8 @@ data: "url": "/d/atlas-nodes", "targetBlank": true } - ] + ], + "description": "Highest worker memory use right now; lower is safer, high values risk evictions." }, { "id": 9, @@ -726,7 +734,8 @@ data: "url": "/d/atlas-nodes", "targetBlank": true } - ] + ], + "description": "Busiest node network rate; spikes can reveal traffic concentration or noisy services." }, { "id": 10, @@ -795,7 +804,8 @@ data: "url": "/d/atlas-nodes", "targetBlank": true } - ] + ], + "description": "Busiest node disk I/O rate; high values can explain slow storage-backed apps." }, { "id": 23, @@ -870,7 +880,8 @@ data: "url": "/d/atlas-storage", "targetBlank": true } - ] + ], + "description": "Percent of Astreae used; lower is safer, high values reduce storage headroom." }, { "id": 24, @@ -945,7 +956,8 @@ data: "url": "/d/atlas-storage", "targetBlank": true } - ] + ], + "description": "Percent of Asteria used; lower is safer, high values reduce storage headroom." }, { "id": 25, @@ -1012,7 +1024,8 @@ data: "url": "/d/atlas-storage", "targetBlank": true } - ] + ], + "description": "Free space on Astreae; higher is better for backups and workload growth." }, { "id": 26, @@ -1079,7 +1092,8 @@ data: "url": "/d/atlas-storage", "targetBlank": true } - ] + ], + "description": "Free space on Asteria; higher is better for backups and workload growth." }, { "id": 40, @@ -1177,7 +1191,8 @@ data: "url": "/d/atlas-power", "targetBlank": true } - ] + ], + "description": "Live Pyrphoros UPS draw and runtime; stable runtime means the lab can ride out short outages." }, { "id": 144, @@ -1275,7 +1290,8 @@ data: "url": "/d/atlas-power", "targetBlank": true } - ] + ], + "description": "Live Statera UPS draw and runtime; stable runtime means the lab can ride out short outages." }, { "id": 41, @@ -1363,7 +1379,8 @@ data: "url": "/d/atlas-power", "targetBlank": true } - ] + ], + "description": "UPS power draw over time; steady draw is normal, spikes show sudden load changes." }, { "id": 42, @@ -1461,7 +1478,8 @@ data: "url": "/d/atlas-power", "targetBlank": true } - ] + ], + "description": "Current tent temperature in C and F; moderate values protect hardware and plants." }, { "id": 143, @@ -1559,7 +1577,8 @@ data: "url": "/d/atlas-power", "targetBlank": true } - ] + ], + "description": "Current humidity and VPD; in-range values mean the enclosure climate is stable." }, { "id": 43, @@ -2184,7 +2203,8 @@ data: "url": "/d/atlas-testing", "targetBlank": true } - ] + ], + "description": "Percent of CI runs that completed successfully in 24h; higher is better for release confidence." }, { "id": 152, @@ -2265,7 +2285,8 @@ data: "url": "/d/atlas-testing", "targetBlank": true } - ] + ], + "description": "CI runs that failed in 24h; zero is good, any value means recent test signal is noisy." }, { "id": 153, @@ -2346,7 +2367,8 @@ data: "url": "/d/atlas-testing", "targetBlank": true } - ] + ], + "description": "Suites with at least one run in 24h; higher is better because stale suites hide failures." }, { "id": 154, @@ -2431,7 +2453,8 @@ data: "url": "/d/atlas-testing", "targetBlank": true } - ] + ], + "description": "Average latest line coverage across suites; higher means code is better protected by tests." }, { "id": 155, @@ -2512,7 +2535,8 @@ data: "url": "/d/atlas-testing", "targetBlank": true } - ] + ], + "description": "Suites with no source files over 500 LOC; full count is good for maintainability." }, { "id": 150, @@ -2683,7 +2707,8 @@ data: "limit": 12 } } - ] + ], + "description": "Temporary job pods by age; low or empty is good, old pods usually need cleanup." }, { "id": 45, @@ -2773,7 +2798,8 @@ data: "url": "/d/atlas-testing", "targetBlank": true } - ] + ], + "description": "Ariadne automation attempts and failures; attempts show activity, failures show work to investigate." }, { "id": 46, @@ -3219,7 +3245,8 @@ data: "url": "/d/atlas-mail", "targetBlank": true } - ] + ], + "description": "Outbound mail sent in the last day; useful context for mail health and bounce rates." }, { "id": 31, @@ -3324,7 +3351,8 @@ data: "url": "/d/atlas-mail", "targetBlank": true } - ] + ], + "description": "Outbound mail bounce rate and count; zero is best, high values risk delivery reputation." }, { "id": 32, @@ -3400,7 +3428,8 @@ data: "url": "/d/atlas-mail", "targetBlank": true } - ] + ], + "description": "Outbound mail success rate; higher is better for user notifications." }, { "id": 33, @@ -3476,7 +3505,8 @@ data: "url": "/d/atlas-mail", "targetBlank": true } - ] + ], + "description": "Postmark monthly send limit used; lower leaves more quota headroom." }, { "id": 34, @@ -3539,7 +3569,8 @@ data: "values": false }, "textMode": "name_and_value" - } + }, + "description": "Current Postgres connections; lower leaves room for apps during spikes." }, { "id": 35, @@ -3602,7 +3633,8 @@ data: "values": false }, "textMode": "name_and_value" - } + }, + "description": "Database with the most active connections; high values identify the pressure source." }, { "id": 11, @@ -3856,7 +3888,8 @@ data: "url": "/d/atlas-nodes", "targetBlank": true } - ] + ], + "description": "Worker CPU over time; lower is calmer, sustained high load may need rescheduling." }, { "id": 15, @@ -3903,7 +3936,8 @@ data: "url": "/d/atlas-nodes", "targetBlank": true } - ] + ], + "description": "Worker memory over time; lower is safer, sustained high use risks evictions." }, { "id": 16, @@ -3940,7 +3974,8 @@ data: "tooltip": { "mode": "multi" } - } + }, + "description": "Control-plane CPU over time; low steady usage means Kubernetes has control headroom." }, { "id": 17, @@ -3977,7 +4012,8 @@ data: "tooltip": { "mode": "multi" } - } + }, + "description": "Control-plane memory over time; low steady usage means Kubernetes has control headroom." }, { "id": 28, @@ -4028,7 +4064,8 @@ data: "fields": "", "values": false } - } + }, + "description": "Share of pods per node; uneven share can reveal overloaded workers." }, { "id": 29, @@ -4109,7 +4146,8 @@ data: "limit": 12 } } - ] + ], + "description": "Nodes with the most pods; lower and balanced is easier to operate." }, { "id": 18, @@ -4153,7 +4191,8 @@ data: "url": "/d/atlas-network", "targetBlank": true } - ] + ], + "description": "Traffic entering the cluster; spikes should line up with expected usage." }, { "id": 19, @@ -4197,7 +4236,8 @@ data: "url": "/d/atlas-network", "targetBlank": true } - ] + ], + "description": "Traffic leaving the cluster; spikes should line up with expected usage." }, { "id": 20, @@ -4241,7 +4281,8 @@ data: "url": "/d/atlas-network", "targetBlank": true } - ] + ], + "description": "Traffic inside the cluster; high values can expose chatty services." }, { "id": 21, @@ -4289,7 +4330,8 @@ data: "url": "/d/atlas-storage", "targetBlank": true } - ] + ], + "description": "Node root disk usage; lower is safer, high values can break kubelet." }, { "id": 22, @@ -4337,7 +4379,8 @@ data: "url": "/d/atlas-storage", "targetBlank": true } - ] + ], + "description": "Astraios disk fullness by node; lower is safer for storage reliability." } ], "schemaVersion": 39, diff --git a/services/monitoring/grafana-dashboard-testing.yaml b/services/monitoring/grafana-dashboard-testing.yaml index e01a86d1..d08c4baa 100644 --- a/services/monitoring/grafana-dashboard-testing.yaml +++ b/services/monitoring/grafana-dashboard-testing.yaml @@ -85,7 +85,8 @@ data: "values": false }, "textMode": "value" - } + }, + "description": "Percent of selected CI runs that finished successfully in 24h; higher is better." }, { "id": 3, @@ -159,7 +160,8 @@ data: "values": false }, "textMode": "value" - } + }, + "description": "Percent of selected CI runs that finished successfully in 30d; higher shows stable automation." }, { "id": 4, @@ -232,7 +234,8 @@ data: "values": false }, "textMode": "value" - } + }, + "description": "Selected CI runs that failed in 24h; zero is good and anything else needs a look." }, { "id": 5, @@ -293,7 +296,8 @@ data: "values": false }, "textMode": "value" - } + }, + "description": "Selected CI run count in 24h; zero means the dashboard may be stale." }, { "id": 6, @@ -367,7 +371,8 @@ data: "values": false }, "textMode": "value" - } + }, + "description": "Average latest line coverage for selected suites; higher means better test protection." }, { "id": 7, @@ -440,7 +445,8 @@ data: "values": false }, "textMode": "value" - } + }, + "description": "Selected suites with oversized source files; zero is good for maintainability." }, { "id": 8, @@ -458,7 +464,7 @@ data: }, "targets": [ { - "expr": "sort(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"} > bool 0)))), 1)) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0)))))", + "expr": "sort(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"} > bool 0)))), 1)) or on(suite) ((((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))) - 1))))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -497,7 +503,17 @@ data: } ] }, - "decimals": 2 + "decimals": 2, + "mappings": [ + { + "type": "value", + "options": { + "-1": { + "text": "missing" + } + } + } + ] }, "overrides": [] }, @@ -523,7 +539,7 @@ data: } } ], - "description": "Current pass percentage across the required gate dimensions reported by each suite. This is the fastest place to answer whether the latest suite quality signal is healthy." + "description": "Current pass percentage across the required gate dimensions reported by each suite. 100% is clean; missing means the suite has not published current gate data." }, { "id": 9, @@ -541,7 +557,7 @@ data: }, "targets": [ { - "expr": "sort_desc(((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[24h]))) > 0)) or on(suite) ((0 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[24h])))) - 1))", + "expr": "sort(((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[24h]))) > 0)) or on(suite) ((((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))) - 1)))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -708,7 +724,8 @@ data: "order": "asc" } } - ] + ], + "description": "Latest suite coverage; 95%+ is acceptable and 100% is strongest." }, { "id": 18, @@ -895,7 +912,7 @@ data: "id": 153, "type": "state-timeline", "title": "Test Category Pass Rate History", - "description": "Pass rate over time grouped by the test category label. Use the Suite filter to focus this on one project; suites without category-aware publishers fall back to uncategorized.", + "description": "Pass rate by test category from current per-test metrics. Use the Suite filter to focus one project; no data means that suite has not published category-aware results yet.", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -1084,7 +1101,7 @@ data: }, "gridPos": { "h": 8, - "w": 12, + "w": 8, "x": 0, "y": 27 }, @@ -1139,8 +1156,8 @@ data: }, "gridPos": { "h": 8, - "w": 6, - "x": 12, + "w": 8, + "x": 8, "y": 27 }, "targets": [ @@ -1215,8 +1232,8 @@ data: }, "gridPos": { "h": 8, - "w": 6, - "x": 18, + "w": 8, + "x": 16, "y": 27 }, "targets": [ @@ -1280,7 +1297,8 @@ data: } } } - ] + ], + "description": "Recent run, coverage, LOC, and category trends for selected suites." }, { "id": 501, @@ -1311,7 +1329,7 @@ data: }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1387,7 +1405,7 @@ data: }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1463,7 +1481,7 @@ data: }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1539,7 +1557,7 @@ data: }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1615,7 +1633,7 @@ data: }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1691,7 +1709,7 @@ data: }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1767,7 +1785,7 @@ data: }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result!=\"\",result!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",status!=\"\",status!~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1826,7 +1844,8 @@ data: } } } - ] + ], + "description": "Failure percent by check family; blue is zero failures, warmer colors show blockers." }, { "id": 502, @@ -1857,7 +1876,7 @@ data: }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"tests|unit|build\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -1933,7 +1952,7 @@ data: }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"coverage\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2009,7 +2028,7 @@ data: }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"loc|smell\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2085,7 +2104,7 @@ data: }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"docs|naming|hygiene|lint|docs_naming|style\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2161,7 +2180,7 @@ data: }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"gate|glue|gate_glue\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2237,7 +2256,7 @@ data: }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"sonarqube|sonar\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2313,7 +2332,7 @@ data: }, "targets": [ { - "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1))) or on(suite) ((0 * (count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0))))", + "expr": "(((100 * (sum by (suite) (max by (suite, check) ((({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",result=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0) or ({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\",status=~\"ok|passed|success|not_applicable|skipped|na|n/a\"} > bool 0))))) / clamp_min((sum by (suite) (max by (suite, check) (({__name__=~\".*_quality_gate_checks_total\",suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\",check=~\"ironbank|supply_chain|image_compliance|artifact_security\"} > bool 0)))), 1))) or on(suite) ((0 * ((count by (suite) (platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}) >= bool 0) or (count by (suite) (max_over_time(platform_quality_gate_runs_total{suite=~\"${suite:regex}\",exported_job=\"platform-quality-ci\"}[30d])) >= bool 0)))))", "refId": "A", "legendFormat": "{{suite}}" } @@ -2372,7 +2391,8 @@ data: } } } - ] + ], + "description": "Healthy percent by check family; blue means all selected checks are good." }, { "id": 503, @@ -2390,7 +2410,7 @@ data: "id": 145, "type": "state-timeline", "title": "Problematic Tests Over Time (Top failures)", - "description": "Top failing test cases over time, using memoized hourly rollups. Blank branch/test labels and placeholder no-test-case rows are excluded.", + "description": "Top tests inside each hourly bucket. A test can show only 1-2 here while the 30d panel shows a larger total from earlier hours.", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -2423,19 +2443,19 @@ data: }, { "color": "dark-green", - "value": 0.01 + "value": 2 }, { "color": "dark-yellow", - "value": 1 - }, - { - "color": "dark-orange", "value": 3 }, { - "color": "dark-red", + "color": "dark-orange", "value": 5 + }, + { + "color": "dark-red", + "value": 8 } ] }, @@ -2619,19 +2639,19 @@ data: }, { "color": "dark-green", - "value": 0.01 + "value": 2 }, { "color": "dark-yellow", - "value": 1 - }, - { - "color": "dark-orange", "value": 3 }, { - "color": "dark-red", + "color": "dark-orange", "value": 5 + }, + { + "color": "dark-red", + "value": 8 } ] }, @@ -2784,7 +2804,8 @@ data: "limit": 9 } } - ] + ], + "description": "Worst test per suite summed across 30d. This catches historical repeat offenders even when the current hourly top list is quiet." }, { "id": 146, @@ -3159,7 +3180,8 @@ data: } ] } - ] + ], + "description": "Test-case detail for finding which tests are hurting reliability." }, { "id": 504, @@ -3189,7 +3211,7 @@ data: }, "targets": [ { - "expr": "sort_desc((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) ({__name__=~\".*_quality_gate_tests_total\",exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", + "expr": "sort((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) ({__name__=~\".*_quality_gate_tests_total\",exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -3250,10 +3272,11 @@ data: "fields": [ "Value" ], - "order": "desc" + "order": "asc" } } - ] + ], + "description": "Whether suite-level test counts are present; 100% means the suite is reporting." }, { "id": 28, @@ -3271,7 +3294,7 @@ data: }, "targets": [ { - "expr": "sort_desc((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) ({__name__=~\".*_quality_gate_checks_total\",exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", + "expr": "sort((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) ({__name__=~\".*_quality_gate_checks_total\",exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -3332,10 +3355,11 @@ data: "fields": [ "Value" ], - "order": "desc" + "order": "asc" } } - ] + ], + "description": "Whether gate check metrics are present; 100% means health panels have inputs." }, { "id": 29, @@ -3353,7 +3377,7 @@ data: }, "targets": [ { - "expr": "sort_desc((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_workspace_line_coverage_percent{exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", + "expr": "sort((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_workspace_line_coverage_percent{exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -3414,10 +3438,11 @@ data: "fields": [ "Value" ], - "order": "desc" + "order": "asc" } } - ] + ], + "description": "Whether coverage metrics are present; 100% means coverage panels are reliable." }, { "id": 30, @@ -3435,7 +3460,7 @@ data: }, "targets": [ { - "expr": "sort_desc((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_source_lines_over_500_total{exported_job=\"platform-quality-ci\"}) and on(suite) count by (suite) (platform_quality_gate_source_files_total{exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", + "expr": "sort((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_source_lines_over_500_total{exported_job=\"platform-quality-ci\"}) and on(suite) count by (suite) (platform_quality_gate_source_files_total{exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -3496,10 +3521,11 @@ data: "fields": [ "Value" ], - "order": "desc" + "order": "asc" } } - ] + ], + "description": "Whether LOC metrics are present; 100% means size panels are reliable." }, { "id": 148, @@ -3517,7 +3543,7 @@ data: }, "targets": [ { - "expr": "sort_desc((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_test_case_result{exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", + "expr": "sort((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_test_case_result{exported_job=\"platform-quality-ci\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -3578,10 +3604,11 @@ data: "fields": [ "Value" ], - "order": "desc" + "order": "asc" } } - ] + ], + "description": "Whether per-test metrics are present; 100% enables drilldowns." }, { "id": 151, @@ -3599,7 +3626,7 @@ data: }, "targets": [ { - "expr": "sort_desc((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_test_case_result{exported_job=\"platform-quality-ci\",test!=\"__no_test_cases__\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", + "expr": "sort((100 * (((label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\")) and on(suite) count by (suite) (platform_quality_gate_test_case_result{exported_job=\"platform-quality-ci\",test!=\"__no_test_cases__\"})))) or on(suite) (0 * (label_replace(vector(1), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"ananke\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"atlasbot\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"lesavka\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"pegasus\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"soteria\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"titan_iac\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"bstein_home\", \"__name__\", \".*\") or label_replace(vector(1), \"suite\", \"data_prepper\", \"__name__\", \".*\"))))", "refId": "A", "legendFormat": "{{suite}}", "instant": true @@ -3660,10 +3687,201 @@ data: "fields": [ "Value" ], - "order": "desc" + "order": "asc" } } - ] + ], + "description": "Whether real test names are present; 100% means not just placeholder telemetry." + }, + { + "id": 150, + "type": "bargauge", + "title": "Primary Branch Clean by Suite (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 101 + }, + "targets": [ + { + "expr": "sort((100 * (((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d]))) > bool 0) unless on(suite) ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\",branch!~\"main|master|origin/main|origin/master|unknown\"}[30d]))) > bool 0))) or on(suite) (0 * ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d]))) > bool 0)))", + "refId": "A", + "legendFormat": "{{suite}}", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "unit": "percent", + "min": 0, + "max": 100, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 90 + }, + { + "color": "dark-yellow", + "value": 93 + }, + { + "color": "dark-green", + "value": 95 + }, + { + "color": "dark-blue", + "value": 100 + } + ] + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "displayMode": "basic", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "links": [ + { + "title": "Open Jenkins", + "url": "${jenkins_base}/", + "targetBlank": true + }, + { + "title": "ariadne: Job", + "url": "${jenkins_base}/job/ariadne/", + "targetBlank": true + }, + { + "title": "ariadne: Last Artifacts", + "url": "${jenkins_base}/job/ariadne/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "metis: Job", + "url": "${jenkins_base}/job/metis/", + "targetBlank": true + }, + { + "title": "metis: Last Artifacts", + "url": "${jenkins_base}/job/metis/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "ananke: Job", + "url": "${jenkins_base}/job/ananke/", + "targetBlank": true + }, + { + "title": "ananke: Last Artifacts", + "url": "${jenkins_base}/job/ananke/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "atlasbot: Job", + "url": "${jenkins_base}/job/atlasbot/", + "targetBlank": true + }, + { + "title": "atlasbot: Last Artifacts", + "url": "${jenkins_base}/job/atlasbot/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "lesavka: Job", + "url": "${jenkins_base}/job/lesavka/", + "targetBlank": true + }, + { + "title": "lesavka: Last Artifacts", + "url": "${jenkins_base}/job/lesavka/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "pegasus: Job", + "url": "${jenkins_base}/job/pegasus/", + "targetBlank": true + }, + { + "title": "pegasus: Last Artifacts", + "url": "${jenkins_base}/job/pegasus/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "soteria: Job", + "url": "${jenkins_base}/job/Soteria/", + "targetBlank": true + }, + { + "title": "soteria: Last Artifacts", + "url": "${jenkins_base}/job/Soteria/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "titan_iac: Job", + "url": "${jenkins_base}/job/titan-iac/", + "targetBlank": true + }, + { + "title": "titan_iac: Last Artifacts", + "url": "${jenkins_base}/job/titan-iac/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "bstein_home: Job", + "url": "${jenkins_base}/job/bstein-dev-home/", + "targetBlank": true + }, + { + "title": "bstein_home: Last Artifacts", + "url": "${jenkins_base}/job/bstein-dev-home/lastCompletedBuild/artifact/", + "targetBlank": true + }, + { + "title": "data_prepper: Job", + "url": "${jenkins_base}/job/data-prepper/", + "targetBlank": true + }, + { + "title": "data_prepper: Last Artifacts", + "url": "${jenkins_base}/job/data-prepper/lastCompletedBuild/artifact/", + "targetBlank": true + } + ], + "transformations": [ + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "asc" + } + } + ], + "description": "Percent clean of non-primary branch evidence; 100% means only main/master is reporting." }, { "id": 149, @@ -3676,7 +3894,7 @@ data: "gridPos": { "h": 7, "w": 6, - "x": 12, + "x": 18, "y": 101 }, "targets": [ @@ -3840,198 +4058,11 @@ data: "order": "desc" } } - ] - }, - { - "id": 150, - "type": "bargauge", - "title": "Primary Branch Clean by Suite (30d)", - "datasource": { - "type": "prometheus", - "uid": "atlas-vm" - }, - "gridPos": { - "h": 7, - "w": 6, - "x": 18, - "y": 101 - }, - "targets": [ - { - "expr": "sort_desc((100 * (((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d]))) > bool 0) unless on(suite) ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\",branch!~\"main|master|origin/main|origin/master|unknown\"}[30d]))) > bool 0))) or on(suite) (0 * ((count by (suite) (max_over_time(platform_quality_gate_build_info{suite=~\"${suite:regex}\",branch!=\"\",branch=~\"${branch:regex}\",exported_job=\"platform-quality-ci\"}[30d]))) > bool 0)))", - "refId": "A", - "legendFormat": "{{suite}}", - "instant": true - } ], - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "unit": "percent", - "min": 0, - "max": 100, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "dark-red", - "value": null - }, - { - "color": "dark-orange", - "value": 90 - }, - { - "color": "dark-yellow", - "value": 93 - }, - { - "color": "dark-green", - "value": 95 - }, - { - "color": "dark-blue", - "value": 100 - } - ] - }, - "decimals": 0 - }, - "overrides": [] - }, - "options": { - "displayMode": "basic", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - } - }, - "links": [ - { - "title": "Open Jenkins", - "url": "${jenkins_base}/", - "targetBlank": true - }, - { - "title": "ariadne: Job", - "url": "${jenkins_base}/job/ariadne/", - "targetBlank": true - }, - { - "title": "ariadne: Last Artifacts", - "url": "${jenkins_base}/job/ariadne/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "metis: Job", - "url": "${jenkins_base}/job/metis/", - "targetBlank": true - }, - { - "title": "metis: Last Artifacts", - "url": "${jenkins_base}/job/metis/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "ananke: Job", - "url": "${jenkins_base}/job/ananke/", - "targetBlank": true - }, - { - "title": "ananke: Last Artifacts", - "url": "${jenkins_base}/job/ananke/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "atlasbot: Job", - "url": "${jenkins_base}/job/atlasbot/", - "targetBlank": true - }, - { - "title": "atlasbot: Last Artifacts", - "url": "${jenkins_base}/job/atlasbot/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "lesavka: Job", - "url": "${jenkins_base}/job/lesavka/", - "targetBlank": true - }, - { - "title": "lesavka: Last Artifacts", - "url": "${jenkins_base}/job/lesavka/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "pegasus: Job", - "url": "${jenkins_base}/job/pegasus/", - "targetBlank": true - }, - { - "title": "pegasus: Last Artifacts", - "url": "${jenkins_base}/job/pegasus/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "soteria: Job", - "url": "${jenkins_base}/job/Soteria/", - "targetBlank": true - }, - { - "title": "soteria: Last Artifacts", - "url": "${jenkins_base}/job/Soteria/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "titan_iac: Job", - "url": "${jenkins_base}/job/titan-iac/", - "targetBlank": true - }, - { - "title": "titan_iac: Last Artifacts", - "url": "${jenkins_base}/job/titan-iac/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "bstein_home: Job", - "url": "${jenkins_base}/job/bstein-dev-home/", - "targetBlank": true - }, - { - "title": "bstein_home: Last Artifacts", - "url": "${jenkins_base}/job/bstein-dev-home/lastCompletedBuild/artifact/", - "targetBlank": true - }, - { - "title": "data_prepper: Job", - "url": "${jenkins_base}/job/data-prepper/", - "targetBlank": true - }, - { - "title": "data_prepper: Last Artifacts", - "url": "${jenkins_base}/job/data-prepper/lastCompletedBuild/artifact/", - "targetBlank": true - } - ], - "transformations": [ - { - "id": "sortBy", - "options": { - "fields": [ - "Value" - ], - "order": "desc" - } - } - ] + "description": "Branches with recent CI evidence; unexpected branches can mean drift or stale work." } - ] + ], + "description": "Checks that each suite publishes the data this dashboard needs." }, { "id": 505, @@ -4104,7 +4135,8 @@ data: "values": false }, "textMode": "value" - } + }, + "description": "Whether the SonarQube exporter can reach SonarQube; 1 is good." }, { "id": 32, @@ -4177,7 +4209,8 @@ data: "values": false }, "textMode": "value" - } + }, + "description": "Selected SonarQube project count; zero means Sonar is not tracking that suite." }, { "id": 33, @@ -4250,7 +4283,8 @@ data: "values": false }, "textMode": "value" - } + }, + "description": "Sonar exporter fetch errors; zero is good because stale Sonar data misleads." }, { "id": 34, @@ -4301,7 +4335,8 @@ data: "fields": "", "values": false } - } + }, + "description": "Mix of Sonar gate states; OK is good and non-OK needs cleanup." }, { "id": 35, @@ -4379,7 +4414,8 @@ data: } } } - ] + ], + "description": "SonarQube availability, projects, fetch errors, and gate status." } ], "time": {