monitoring(jobs): split testing dashboard and clean up job ops view

This commit is contained in:
Brad Stein 2026-04-12 20:05:39 -03:00
parent 049a0deb04
commit 299a68ad95
8 changed files with 3488 additions and 2253 deletions

View File

@ -428,13 +428,59 @@ ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS = (
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS = (
f"(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range])) / 3600"
)
ARIADNE_SCHEDULE_NEXT_RUN_HOURS = (
f"((ariadne_schedule_next_run_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}} - time()) / 3600)"
)
ARIADNE_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_SCHEDULE_TASK_FILTER}}}"
ARIADNE_SCHEDULE_SIGNAL_COUNT = (
f"count(ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) or on() vector(0)"
)
ARIADNE_SCHEDULE_STALE_WINDOW_SEC = 36 * 3600
ARIADNE_SCHEDULE_STALE_COUNT = (
f"sum(((time() - ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) > bool {ARIADNE_SCHEDULE_STALE_WINDOW_SEC}))"
" or on() vector(0)"
)
ARIADNE_SCHEDULE_MISSING_SUCCESS_COUNT = (
f"count((ariadne_schedule_next_run_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}} unless on(task) "
f"ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}})) or on() vector(0)"
)
ARIADNE_SCHEDULE_FAILED_LAST_COUNT = (
f"sum(((1 - ariadne_schedule_last_status{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) > bool 0)) or on() vector(0)"
)
ARIADNE_SCHEDULE_RUNS_RANGE = (
f'sum by (task) (increase(ariadne_task_runs_total{{{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range]))'
)
ARIADNE_SCHEDULE_ERRORS_RANGE = (
f'sum by (task) (increase(ariadne_task_runs_total{{status="error",{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range]))'
)
JENKINS_CLEANUP_SIGNAL_COUNT = (
"count(ariadne_jenkins_workspace_cleanup_last_run_timestamp_seconds) or on() vector(0)"
)
JENKINS_CLEANUP_RUNS_RANGE = (
"sum by (mode, status) (increase(ariadne_jenkins_workspace_cleanup_runs_total[$__range]))"
)
JENKINS_CLEANUP_OBJECTS_RANGE = (
"sum by (kind, action, mode) (increase(ariadne_jenkins_workspace_cleanup_objects_total[$__range]))"
)
JENKINS_CLEANUP_LAST_RUN_AGE_HOURS = (
"((time() - ariadne_jenkins_workspace_cleanup_last_run_timestamp_seconds) / 3600) or on() vector(999)"
)
JENKINS_CLEANUP_LAST_SUCCESS_AGE_HOURS = (
"((time() - ariadne_jenkins_workspace_cleanup_last_success_timestamp_seconds) / 3600) or on() vector(999)"
)
JENKINS_CLEANUP_LAST_DELETED = "ariadne_jenkins_workspace_cleanup_last_deleted_total or on() vector(0)"
JENKINS_CLEANUP_LAST_PLANNED = "ariadne_jenkins_workspace_cleanup_last_planned_total or on() vector(0)"
JENKINS_WORKSPACE_PV_STALE_COUNT = (
'sum((kube_persistentvolume_status_phase{phase=~"Released|Failed"} > bool 0) '
'* on(persistentvolume) group_left(claim_namespace,name) '
'kube_persistentvolume_claim_ref{claim_namespace="jenkins",name=~"pvc-workspace-.*"}) or on() vector(0)'
)
JENKINS_WORKSPACE_PV_STALE_AGE_HOURS = (
'((time() - kube_persistentvolume_created) / 3600) '
'* on(persistentvolume) group_left(claim_namespace,name) '
'kube_persistentvolume_claim_ref{claim_namespace="jenkins",name=~"pvc-workspace-.*"} '
'* on(persistentvolume) group_left() (kube_persistentvolume_status_phase{phase=~"Released|Failed"} > bool 0)'
)
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
PLATFORM_TEST_SUITE_NAMES = [
"ariadne",
@ -1603,7 +1649,7 @@ def build_overview():
legend_display="table",
legend_placement="right",
legend_calcs=["lastNotNull"],
links=link_to("atlas-jobs"),
links=link_to("atlas-testing"),
)
test_success["fieldConfig"]["defaults"]["min"] = 0
test_success["fieldConfig"]["defaults"]["max"] = 100
@ -2903,6 +2949,15 @@ def build_jobs_dashboard():
{"color": "red", "value": 48},
],
}
old_age_thresholds = {
"mode": "absolute",
"steps": [
{"color": "green", "value": None},
{"color": "yellow", "value": 24},
{"color": "orange", "value": 72},
{"color": "red", "value": 168},
],
}
recent_error_thresholds = {
"mode": "absolute",
"steps": [
@ -2912,7 +2967,6 @@ def build_jobs_dashboard():
{"color": "green", "value": 24},
],
}
task_error_thresholds = {
"mode": "absolute",
"steps": [
@ -2922,6 +2976,15 @@ def build_jobs_dashboard():
{"color": "red", "value": 5},
],
}
count_thresholds = {
"mode": "absolute",
"steps": [
{"color": "green", "value": None},
{"color": "yellow", "value": 1},
{"color": "orange", "value": 2},
{"color": "red", "value": 3},
],
}
schedule_status_thresholds = {
"mode": "absolute",
"steps": [
@ -2932,24 +2995,162 @@ def build_jobs_dashboard():
}
panels.append(
bargauge_panel(
stat_panel(
1,
"Ariadne Task Errors (range)",
ARIADNE_TASK_ERRORS_RANGE,
{"h": 7, "w": 8, "x": 0, "y": 0},
"Schedule Metrics Exported",
ARIADNE_SCHEDULE_SIGNAL_COUNT,
{"h": 4, "w": 4, "x": 0, "y": 0},
unit="none",
instant=True,
legend="{{task}}",
thresholds=task_error_thresholds,
thresholds={
"mode": "absolute",
"steps": [
{"color": "red", "value": None},
{"color": "green", "value": 1},
],
},
)
)
panels.append(
stat_panel(
2,
"Schedule Tasks Stale (>36h)",
ARIADNE_SCHEDULE_STALE_COUNT,
{"h": 4, "w": 4, "x": 4, "y": 0},
unit="none",
thresholds=count_thresholds,
)
)
panels.append(
stat_panel(
3,
"Schedule Tasks Missing Success",
ARIADNE_SCHEDULE_MISSING_SUCCESS_COUNT,
{"h": 4, "w": 4, "x": 8, "y": 0},
unit="none",
thresholds=count_thresholds,
)
)
panels.append(
stat_panel(
4,
"Schedule Tasks Failed Last Run",
ARIADNE_SCHEDULE_FAILED_LAST_COUNT,
{"h": 4, "w": 4, "x": 12, "y": 0},
unit="none",
thresholds=count_thresholds,
)
)
panels.append(
stat_panel(
5,
"Glue Jobs Stale (>36h)",
GLUE_STALE_COUNT,
{"h": 4, "w": 4, "x": 16, "y": 0},
unit="none",
thresholds=count_thresholds,
)
)
panels.append(
stat_panel(
6,
"Jenkins Workspace PV Backlog",
JENKINS_WORKSPACE_PV_STALE_COUNT,
{"h": 4, "w": 4, "x": 20, "y": 0},
unit="none",
thresholds={
"mode": "absolute",
"steps": [
{"color": "green", "value": None},
{"color": "yellow", "value": 1},
{"color": "orange", "value": 10},
{"color": "red", "value": 25},
],
},
)
)
schedule_list_panel = table_panel(
7,
"Ariadne Schedules: Last Success (h, newest first)",
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS,
{"h": 8, "w": 12, "x": 0, "y": 4},
unit="h",
transformations=[
{"id": "labelsToFields", "options": {}},
{"id": "sortBy", "options": {"fields": ["Value"], "order": "asc"}},
],
instant=True,
)
schedule_list_panel["description"] = "Primary schedule inventory ordered by recency so fresh jobs stay at the top."
panels.append(schedule_list_panel)
panels.append(
bargauge_panel(
8,
"Ariadne Schedule Last Error (hours ago)",
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS,
{"h": 8, "w": 12, "x": 12, "y": 4},
unit="h",
instant=True,
legend="{{task}}",
sort_order="asc",
thresholds=recent_error_thresholds,
decimals=2,
)
)
status_panel = bargauge_panel(
9,
"Ariadne Schedule Last Status",
ARIADNE_SCHEDULE_LAST_STATUS,
{"h": 8, "w": 8, "x": 0, "y": 12},
unit="none",
instant=True,
legend="{{task}}",
sort_order="asc",
thresholds=schedule_status_thresholds,
decimals=0,
)
status_panel["description"] = "1 means the last run was ok. 0 means the last run ended in error."
status_panel["fieldConfig"]["defaults"]["mappings"] = [
{
"id": 2,
"type": "value",
"options": {
"0": {"text": "error"},
"1": {"text": "ok"},
},
}
]
panels.append(status_panel)
schedule_runs_panel = bargauge_panel(
10,
"Ariadne Schedule Runs (range)",
ARIADNE_SCHEDULE_RUNS_RANGE,
{"h": 8, "w": 8, "x": 8, "y": 12},
unit="none",
instant=True,
legend="{{task}}",
thresholds={"mode": "absolute", "steps": [{"color": "green", "value": None}]},
)
schedule_runs_panel["description"] = "Number of runs by schedule task over the selected dashboard time range."
panels.append(schedule_runs_panel)
schedule_errors_panel = bargauge_panel(
11,
"Ariadne Schedule Errors (range)",
ARIADNE_SCHEDULE_ERRORS_RANGE,
{"h": 8, "w": 8, "x": 16, "y": 12},
unit="none",
instant=True,
legend="{{task}}",
thresholds=task_error_thresholds,
)
schedule_errors_panel["description"] = "Error run count by schedule task over the selected dashboard time range."
panels.append(schedule_errors_panel)
panels.append(
{
"id": 12,
"type": "timeseries",
"title": "Ariadne Attempts / Failures",
"datasource": PROM_DS,
"gridPos": {"h": 7, "w": 8, "x": 8, "y": 0},
"gridPos": {"h": 7, "w": 12, "x": 0, "y": 20},
"targets": [
{"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"},
{"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "B", "legendFormat": "Failures"},
@ -2979,10 +3180,10 @@ def build_jobs_dashboard():
)
panels.append(
bargauge_panel(
3,
13,
"One-off Job Pods (age hours)",
ONEOFF_JOB_POD_AGE_HOURS,
{"h": 7, "w": 8, "x": 16, "y": 0},
{"h": 7, "w": 12, "x": 12, "y": 20},
unit="h",
instant=True,
legend="{{namespace}}/{{pod}}",
@ -2991,186 +3192,251 @@ def build_jobs_dashboard():
decimals=2,
)
)
panels.append(
bargauge_panel(
14,
"Glue Jobs Last Success (hours ago)",
GLUE_LAST_SUCCESS_RANGE_HOURS,
{"h": 6, "w": 12, "x": 0, "y": 27},
unit="h",
instant=True,
legend="{{namespace}}/{{cronjob}}",
thresholds=age_thresholds,
decimals=2,
)
)
panels.append(
bargauge_panel(
15,
"Glue Jobs Last Schedule (hours ago)",
GLUE_LAST_SCHEDULE_RANGE_HOURS,
{"h": 6, "w": 12, "x": 12, "y": 27},
unit="h",
instant=True,
legend="{{namespace}}/{{cronjob}}",
thresholds=age_thresholds,
decimals=2,
)
)
panels.append(
stat_panel(
4,
"Glue Jobs Stale (>36h)",
GLUE_STALE_COUNT,
{"h": 4, "w": 4, "x": 0, "y": 7},
16,
"Jenkins Cleanup Signal Present",
JENKINS_CLEANUP_SIGNAL_COUNT,
{"h": 4, "w": 4, "x": 0, "y": 33},
unit="none",
instant=True,
thresholds={
"mode": "absolute",
"steps": [
{"color": "green", "value": None},
{"color": "yellow", "value": 1},
{"color": "orange", "value": 2},
{"color": "red", "value": 3},
{"color": "red", "value": None},
{"color": "green", "value": 1},
],
},
)
)
panels.append(
stat_panel(
5,
"Glue Jobs Missing Success",
GLUE_MISSING_COUNT,
{"h": 4, "w": 4, "x": 4, "y": 7},
unit="none",
)
)
panels.append(
stat_panel(
6,
"Glue Jobs Suspended",
GLUE_SUSPENDED_COUNT,
{"h": 4, "w": 4, "x": 8, "y": 7},
unit="none",
)
)
panels.append(
stat_panel(
7,
"Ariadne Task Errors (1h)",
ARIADNE_TASK_ERRORS_1H_TOTAL,
{"h": 4, "w": 4, "x": 12, "y": 7},
unit="none",
)
)
panels.append(
stat_panel(
8,
"Ariadne Task Errors (24h)",
ARIADNE_TASK_ERRORS_24H_TOTAL,
{"h": 4, "w": 4, "x": 16, "y": 7},
unit="none",
)
)
panels.append(
stat_panel(
9,
"Ariadne Task Runs (1h)",
ARIADNE_TASK_RUNS_1H_TOTAL,
{"h": 4, "w": 4, "x": 20, "y": 7},
unit="none",
)
)
panels.append(
bargauge_panel(
10,
"Ariadne Schedule Last Error (hours ago)",
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS,
{"h": 6, "w": 12, "x": 0, "y": 17},
17,
"Jenkins Cleanup Last Run Age (h)",
JENKINS_CLEANUP_LAST_RUN_AGE_HOURS,
{"h": 4, "w": 4, "x": 4, "y": 33},
unit="h",
instant=True,
legend="{{task}}",
thresholds=recent_error_thresholds,
decimals=2,
)
)
panels.append(
bargauge_panel(
11,
"Ariadne Schedule Last Success (hours ago, newest first)",
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS,
{"h": 6, "w": 12, "x": 12, "y": 17},
unit="h",
instant=True,
legend="{{task}}",
sort_order="asc",
thresholds=age_thresholds,
decimals=2,
)
)
panels.append(
bargauge_panel(
12,
"Glue Jobs Last Success (hours ago)",
GLUE_LAST_SUCCESS_RANGE_HOURS,
{"h": 6, "w": 12, "x": 0, "y": 23},
stat_panel(
18,
"Jenkins Cleanup Last Success Age (h)",
JENKINS_CLEANUP_LAST_SUCCESS_AGE_HOURS,
{"h": 4, "w": 4, "x": 8, "y": 33},
unit="h",
instant=True,
legend="{{namespace}}/{{cronjob}}",
thresholds=age_thresholds,
decimals=2,
instant=True,
thresholds=age_thresholds,
)
)
panels.append(
bargauge_panel(
13,
"Glue Jobs Last Schedule (hours ago)",
GLUE_LAST_SCHEDULE_RANGE_HOURS,
{"h": 6, "w": 12, "x": 12, "y": 23},
unit="h",
instant=True,
legend="{{namespace}}/{{cronjob}}",
thresholds=age_thresholds,
decimals=2,
)
)
panels.append(
bargauge_panel(
14,
"Ariadne Task Errors (1h)",
ARIADNE_TASK_ERRORS_1H,
{"h": 6, "w": 12, "x": 0, "y": 29},
stat_panel(
19,
"Jenkins Cleanup Planned (last run)",
JENKINS_CLEANUP_LAST_PLANNED,
{"h": 4, "w": 4, "x": 12, "y": 33},
unit="none",
instant=True,
legend="{{task}}",
thresholds=task_error_thresholds,
)
)
panels.append(
bargauge_panel(
15,
"Ariadne Task Errors (30d)",
ARIADNE_TASK_ERRORS_30D,
{"h": 6, "w": 12, "x": 12, "y": 29},
stat_panel(
20,
"Jenkins Cleanup Deleted (last run)",
JENKINS_CLEANUP_LAST_DELETED,
{"h": 4, "w": 4, "x": 16, "y": 33},
unit="none",
instant=True,
legend="{{task}}",
thresholds=task_error_thresholds,
)
)
panels.append(
bargauge_panel(
16,
stat_panel(
21,
"Ariadne Access Requests",
ARIADNE_ACCESS_REQUESTS,
{"h": 6, "w": 8, "x": 0, "y": 11},
{"h": 4, "w": 4, "x": 20, "y": 33},
unit="none",
instant=True,
legend="{{status}}",
)
)
coverage_panel = stat_panel(
17,
panels.append(
timeseries_panel(
22,
"Jenkins Cleanup Runs (range)",
None,
{"h": 7, "w": 12, "x": 0, "y": 37},
unit="none",
targets=[
{"refId": "A", "expr": JENKINS_CLEANUP_RUNS_RANGE, "legendFormat": "{{mode}}/{{status}}"},
],
legend_display="table",
legend_placement="right",
)
)
panels.append(
timeseries_panel(
23,
"Jenkins Cleanup Objects (range)",
None,
{"h": 7, "w": 12, "x": 12, "y": 37},
unit="none",
targets=[
{"refId": "A", "expr": JENKINS_CLEANUP_OBJECTS_RANGE, "legendFormat": "{{kind}}/{{action}}/{{mode}}"},
],
legend_display="table",
legend_placement="right",
)
)
stale_volume_panel = bargauge_panel(
24,
"Jenkins Workspace PV Age (h, detached only)",
JENKINS_WORKSPACE_PV_STALE_AGE_HOURS,
{"h": 10, "w": 24, "x": 0, "y": 44},
unit="h",
instant=True,
legend="{{name}} -> {{persistentvolume}}",
thresholds=old_age_thresholds,
decimals=1,
limit=40,
)
stale_volume_panel["description"] = (
"Oldest detached Jenkins workspace volumes first. This is the direct cleanup backlog view."
)
panels.append(stale_volume_panel)
return {
"uid": "atlas-jobs",
"title": "Atlas Jobs",
"folderUid": PRIVATE_FOLDER,
"editable": True,
"panels": panels,
"time": {"from": "now-7d", "to": "now"},
"annotations": {"list": []},
"schemaVersion": 39,
"style": "dark",
"tags": ["atlas", "jobs", "glue"],
}
def build_testing_dashboard():
panels = []
pass_rate_thresholds = {
"mode": "absolute",
"steps": [
{"color": "red", "value": None},
{"color": "orange", "value": 80},
{"color": "yellow", "value": 95},
{"color": "green", "value": 99},
],
}
failures_thresholds = {
"mode": "absolute",
"steps": [
{"color": "green", "value": None},
{"color": "yellow", "value": 1},
{"color": "orange", "value": 3},
{"color": "red", "value": 5},
],
}
pass_rate_panel = stat_panel(
1,
"Platform Test Success Rate (30d)",
TEST_SUCCESS_RATE,
{"h": 6, "w": 4, "x": 8, "y": 11},
{"h": 4, "w": 6, "x": 0, "y": 0},
unit="percent",
decimals=2,
instant=True,
thresholds=pass_rate_thresholds,
)
coverage_panel["description"] = "Internal rollup across Ariadne task runs and Metis build/flash outcomes."
panels.append(coverage_panel)
tests_panel = table_panel(
18,
pass_rate_panel["description"] = "Overall success rate across tracked suites over the last 30 days."
panels.append(pass_rate_panel)
failures_panel = stat_panel(
2,
"Platform Test Failures (24h)",
TEST_FAILURES_24H_TOTAL,
{"h": 4, "w": 6, "x": 6, "y": 0},
unit="none",
instant=True,
thresholds=failures_thresholds,
)
failures_panel["description"] = "Total failed runs in the last 24 hours."
panels.append(failures_panel)
activity_panel = table_panel(
3,
"Platform Test Activity (30d)",
PLATFORM_TEST_ACTIVITY_30D,
{"h": 6, "w": 12, "x": 12, "y": 11},
{"h": 8, "w": 12, "x": 12, "y": 0},
unit="none",
transformations=[{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}],
transformations=[
{"id": "labelsToFields", "options": {}},
{"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}},
],
instant=True,
)
tests_panel["description"] = (
"Atlas Overview test panels depend on this internal activity table sourced from Ariadne and Metis counters."
activity_panel["description"] = "Suite/status event counts over 30 days."
panels.append(activity_panel)
panels.append(
bargauge_panel(
4,
"Platform Test Failures by Suite (24h)",
PLATFORM_TEST_FAILURES_24H_BY_SUITE,
{"h": 8, "w": 12, "x": 0, "y": 8},
unit="none",
instant=True,
legend="{{suite}}",
thresholds=failures_thresholds,
)
)
panels.append(
bargauge_panel(
5,
"Platform Test Success Rate by Suite (24h, lowest first)",
PLATFORM_TEST_SUCCESS_RATE_24H_BY_SUITE,
{"h": 8, "w": 12, "x": 12, "y": 8},
unit="percent",
instant=True,
legend="{{suite}}",
sort_order="asc",
thresholds=pass_rate_thresholds,
decimals=2,
)
)
panels.append(tests_panel)
suite_panel = timeseries_panel(
19,
6,
"Platform Test Success Rate by Suite",
None,
{"h": 6, "w": 16, "x": 8, "y": 17},
{"h": 8, "w": 24, "x": 0, "y": 16},
unit="percent",
targets=PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS,
legend_display="list",
@ -3187,69 +3453,20 @@ def build_jobs_dashboard():
"pointSize": 4,
"spanNulls": True,
}
suite_panel["description"] = (
"Per-run interval pass points (0-100) per suite. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published."
)
suite_panel["description"] = "Trend line per suite. Flat gaps mean no runs in that interval."
panels.append(suite_panel)
status_panel = bargauge_panel(
20,
"Ariadne Schedule Last Status",
ARIADNE_SCHEDULE_LAST_STATUS,
{"h": 8, "w": 8, "x": 0, "y": 35},
unit="none",
instant=True,
legend="{{task}}",
sort_order="asc",
thresholds=schedule_status_thresholds,
decimals=0,
)
status_panel["description"] = "1 means the last run was ok. 0 means the last run ended in error."
status_panel["fieldConfig"]["defaults"]["mappings"] = [
{
"type": "value",
"options": {
"0": {"text": "error"},
"1": {"text": "ok"},
},
}
]
panels.append(status_panel)
schedule_runs_panel = bargauge_panel(
21,
"Ariadne Schedule Runs (range)",
ARIADNE_SCHEDULE_RUNS_RANGE,
{"h": 8, "w": 8, "x": 8, "y": 35},
unit="none",
instant=True,
legend="{{task}}",
thresholds={"mode": "absolute", "steps": [{"color": "green", "value": None}]},
)
schedule_runs_panel["description"] = "Number of runs by schedule task over the selected dashboard time range."
panels.append(schedule_runs_panel)
schedule_errors_panel = bargauge_panel(
22,
"Ariadne Schedule Errors (range)",
ARIADNE_SCHEDULE_ERRORS_RANGE,
{"h": 8, "w": 8, "x": 16, "y": 35},
unit="none",
instant=True,
legend="{{task}}",
thresholds=task_error_thresholds,
)
schedule_errors_panel["description"] = "Error run count by schedule task over the selected dashboard time range."
panels.append(schedule_errors_panel)
return {
"uid": "atlas-jobs",
"title": "Atlas Jobs",
"uid": "atlas-testing",
"title": "Atlas Testing",
"folderUid": PRIVATE_FOLDER,
"editable": True,
"panels": panels,
"time": {"from": "now-7d", "to": "now"},
"time": {"from": "now-30d", "to": "now"},
"annotations": {"list": []},
"schemaVersion": 39,
"style": "dark",
"tags": ["atlas", "jobs", "glue"],
"tags": ["atlas", "testing", "quality"],
}
@ -3529,6 +3746,10 @@ DASHBOARDS = {
"builder": build_jobs_dashboard,
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-jobs.yaml",
},
"atlas-testing": {
"builder": build_testing_dashboard,
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-testing.yaml",
},
"atlas-power": {
"builder": build_power_dashboard,
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-power.yaml",

File diff suppressed because it is too large Load Diff

View File

@ -1915,8 +1915,8 @@
},
"links": [
{
"title": "Open atlas-jobs dashboard",
"url": "/d/atlas-jobs",
"title": "Open atlas-testing dashboard",
"url": "/d/atlas-testing",
"targetBlank": true
}
],

View File

@ -0,0 +1,462 @@
{
"uid": "atlas-testing",
"title": "Atlas Testing",
"folderUid": "atlas-internal",
"editable": true,
"panels": [
{
"id": 1,
"type": "stat",
"title": "Platform Test Success Rate (30d)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 0,
"y": 0
},
"targets": [
{
"expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d])) or on() vector(0))), 1)",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "orange",
"value": 80
},
{
"color": "yellow",
"value": 95
},
{
"color": "green",
"value": 99
}
]
},
"unit": "percent",
"custom": {
"displayMode": "auto"
},
"decimals": 2
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
},
"description": "Overall success rate across tracked suites over the last 30 days."
},
{
"id": 2,
"type": "stat",
"title": "Platform Test Failures (24h)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 6,
"y": 0
},
"targets": [
{
"expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])) or on() vector(0))",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 3
},
{
"color": "red",
"value": 5
}
]
},
"unit": "none",
"custom": {
"displayMode": "auto"
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
},
"description": "Total failed runs in the last 24 hours."
},
{
"id": 3,
"type": "table",
"title": "Platform Test Activity (30d)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"targets": [
{
"expr": "sum by (suite, status) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d]))",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
],
"description": "Suite/status event counts over 30 days."
},
{
"id": 4,
"type": "bargauge",
"title": "Platform Test Failures by Suite (24h)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"targets": [
{
"expr": "sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])))",
"refId": "A",
"legendFormat": "{{suite}}",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"min": 0,
"max": null,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 3
},
{
"color": "red",
"value": 5
}
]
}
},
"overrides": []
},
"options": {
"displayMode": "gradient",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
}
},
"transformations": [
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
{
"id": 5,
"type": "bargauge",
"title": "Platform Test Success Rate by Suite (24h, lowest first)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"targets": [
{
"expr": "sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))) > 0))",
"refId": "A",
"legendFormat": "{{suite}}",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "orange",
"value": 80
},
{
"color": "yellow",
"value": 95
},
{
"color": "green",
"value": 99
}
]
},
"decimals": 2
},
"overrides": []
},
"options": {
"displayMode": "gradient",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
}
},
"transformations": [
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "asc"
}
}
]
},
{
"id": 6,
"type": "timeseries",
"title": "Platform Test Success Rate by Suite",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 16
},
"targets": [
{
"refId": "A",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0)",
"legendFormat": "ariadne"
},
{
"refId": "B",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0)",
"legendFormat": "metis"
},
{
"refId": "C",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0)",
"legendFormat": "ananke"
},
{
"refId": "D",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)",
"legendFormat": "atlasbot"
},
{
"refId": "E",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)",
"legendFormat": "lesavka"
},
{
"refId": "F",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)",
"legendFormat": "pegasus"
},
{
"refId": "G",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)",
"legendFormat": "soteria"
},
{
"refId": "H",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)",
"legendFormat": "titan-iac"
},
{
"refId": "I",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)",
"legendFormat": "bstein-home"
},
{
"refId": "J",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)",
"legendFormat": "arcanagon"
},
{
"refId": "K",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0)",
"legendFormat": "data-prepper"
}
],
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"lineWidth": 2,
"fillOpacity": 10,
"showPoints": "always",
"pointSize": 4,
"spanNulls": true
}
},
"overrides": []
},
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi"
}
},
"description": "Trend line per suite. Flat gaps mean no runs in that interval."
}
],
"time": {
"from": "now-30d",
"to": "now"
},
"annotations": {
"list": []
},
"schemaVersion": 39,
"style": "dark",
"tags": [
"atlas",
"testing",
"quality"
]
}

File diff suppressed because it is too large Load Diff

View File

@ -1924,8 +1924,8 @@ data:
},
"links": [
{
"title": "Open atlas-jobs dashboard",
"url": "/d/atlas-jobs",
"title": "Open atlas-testing dashboard",
"url": "/d/atlas-testing",
"targetBlank": true
}
],

View File

@ -0,0 +1,471 @@
# services/monitoring/grafana-dashboard-testing.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-dashboard-testing
labels:
grafana_dashboard: "1"
data:
atlas-testing.json: |
{
"uid": "atlas-testing",
"title": "Atlas Testing",
"folderUid": "atlas-internal",
"editable": true,
"panels": [
{
"id": 1,
"type": "stat",
"title": "Platform Test Success Rate (30d)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 0,
"y": 0
},
"targets": [
{
"expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d])) or on() vector(0))), 1)",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "orange",
"value": 80
},
{
"color": "yellow",
"value": 95
},
{
"color": "green",
"value": 99
}
]
},
"unit": "percent",
"custom": {
"displayMode": "auto"
},
"decimals": 2
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
},
"description": "Overall success rate across tracked suites over the last 30 days."
},
{
"id": 2,
"type": "stat",
"title": "Platform Test Failures (24h)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 4,
"w": 6,
"x": 6,
"y": 0
},
"targets": [
{
"expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])) or on() vector(0))",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 3
},
{
"color": "red",
"value": 5
}
]
},
"unit": "none",
"custom": {
"displayMode": "auto"
}
},
"overrides": []
},
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "center",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value"
},
"description": "Total failed runs in the last 24 hours."
},
{
"id": 3,
"type": "table",
"title": "Platform Test Activity (30d)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"targets": [
{
"expr": "sum by (suite, status) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d]))",
"refId": "A",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"custom": {
"filterable": true
}
},
"overrides": []
},
"options": {
"showHeader": true,
"columnFilters": false
},
"transformations": [
{
"id": "labelsToFields",
"options": {}
},
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
],
"description": "Suite/status event counts over 30 days."
},
{
"id": 4,
"type": "bargauge",
"title": "Platform Test Failures by Suite (24h)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"targets": [
{
"expr": "sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])))",
"refId": "A",
"legendFormat": "{{suite}}",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "none",
"min": 0,
"max": null,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "orange",
"value": 3
},
{
"color": "red",
"value": 5
}
]
}
},
"overrides": []
},
"options": {
"displayMode": "gradient",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
}
},
"transformations": [
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "desc"
}
}
]
},
{
"id": 5,
"type": "bargauge",
"title": "Platform Test Success Rate by Suite (24h, lowest first)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"targets": [
{
"expr": "sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))) > 0))",
"refId": "A",
"legendFormat": "{{suite}}",
"instant": true
}
],
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "red",
"value": null
},
{
"color": "orange",
"value": 80
},
{
"color": "yellow",
"value": 95
},
{
"color": "green",
"value": 99
}
]
},
"decimals": 2
},
"overrides": []
},
"options": {
"displayMode": "gradient",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
}
},
"transformations": [
{
"id": "sortBy",
"options": {
"fields": [
"Value"
],
"order": "asc"
}
}
]
},
{
"id": 6,
"type": "timeseries",
"title": "Platform Test Success Rate by Suite",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
},
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 16
},
"targets": [
{
"refId": "A",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0)",
"legendFormat": "ariadne"
},
{
"refId": "B",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0)",
"legendFormat": "metis"
},
{
"refId": "C",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0)",
"legendFormat": "ananke"
},
{
"refId": "D",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)",
"legendFormat": "atlasbot"
},
{
"refId": "E",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)",
"legendFormat": "lesavka"
},
{
"refId": "F",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)",
"legendFormat": "pegasus"
},
{
"refId": "G",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)",
"legendFormat": "soteria"
},
{
"refId": "H",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)",
"legendFormat": "titan-iac"
},
{
"refId": "I",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)",
"legendFormat": "bstein-home"
},
{
"refId": "J",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)",
"legendFormat": "arcanagon"
},
{
"refId": "K",
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0)",
"legendFormat": "data-prepper"
}
],
"fieldConfig": {
"defaults": {
"unit": "percent",
"min": 0,
"max": 100,
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"lineWidth": 2,
"fillOpacity": 10,
"showPoints": "always",
"pointSize": 4,
"spanNulls": true
}
},
"overrides": []
},
"options": {
"legend": {
"displayMode": "list",
"placement": "bottom"
},
"tooltip": {
"mode": "multi"
}
},
"description": "Trend line per suite. Flat gaps mean no runs in that interval."
}
],
"time": {
"from": "now-30d",
"to": "now"
},
"annotations": {
"list": []
},
"schemaVersion": 39,
"style": "dark",
"tags": [
"atlas",
"testing",
"quality"
]
}

View File

@ -16,6 +16,7 @@ resources:
- grafana-dashboard-power.yaml
- grafana-dashboard-mail.yaml
- grafana-dashboard-jobs.yaml
- grafana-dashboard-testing.yaml
- dcgm-exporter.yaml
- jetson-tegrastats-exporter.yaml
- postmark-exporter-service.yaml