monitoring(jobs): split testing dashboard and clean up job ops view
This commit is contained in:
parent
049a0deb04
commit
299a68ad95
@ -428,13 +428,59 @@ ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS = (
|
||||
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS = (
|
||||
f"(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range])) / 3600"
|
||||
)
|
||||
ARIADNE_SCHEDULE_NEXT_RUN_HOURS = (
|
||||
f"((ariadne_schedule_next_run_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}} - time()) / 3600)"
|
||||
)
|
||||
ARIADNE_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_SCHEDULE_TASK_FILTER}}}"
|
||||
ARIADNE_SCHEDULE_SIGNAL_COUNT = (
|
||||
f"count(ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) or on() vector(0)"
|
||||
)
|
||||
ARIADNE_SCHEDULE_STALE_WINDOW_SEC = 36 * 3600
|
||||
ARIADNE_SCHEDULE_STALE_COUNT = (
|
||||
f"sum(((time() - ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) > bool {ARIADNE_SCHEDULE_STALE_WINDOW_SEC}))"
|
||||
" or on() vector(0)"
|
||||
)
|
||||
ARIADNE_SCHEDULE_MISSING_SUCCESS_COUNT = (
|
||||
f"count((ariadne_schedule_next_run_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}} unless on(task) "
|
||||
f"ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}})) or on() vector(0)"
|
||||
)
|
||||
ARIADNE_SCHEDULE_FAILED_LAST_COUNT = (
|
||||
f"sum(((1 - ariadne_schedule_last_status{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) > bool 0)) or on() vector(0)"
|
||||
)
|
||||
ARIADNE_SCHEDULE_RUNS_RANGE = (
|
||||
f'sum by (task) (increase(ariadne_task_runs_total{{{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range]))'
|
||||
)
|
||||
ARIADNE_SCHEDULE_ERRORS_RANGE = (
|
||||
f'sum by (task) (increase(ariadne_task_runs_total{{status="error",{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range]))'
|
||||
)
|
||||
JENKINS_CLEANUP_SIGNAL_COUNT = (
|
||||
"count(ariadne_jenkins_workspace_cleanup_last_run_timestamp_seconds) or on() vector(0)"
|
||||
)
|
||||
JENKINS_CLEANUP_RUNS_RANGE = (
|
||||
"sum by (mode, status) (increase(ariadne_jenkins_workspace_cleanup_runs_total[$__range]))"
|
||||
)
|
||||
JENKINS_CLEANUP_OBJECTS_RANGE = (
|
||||
"sum by (kind, action, mode) (increase(ariadne_jenkins_workspace_cleanup_objects_total[$__range]))"
|
||||
)
|
||||
JENKINS_CLEANUP_LAST_RUN_AGE_HOURS = (
|
||||
"((time() - ariadne_jenkins_workspace_cleanup_last_run_timestamp_seconds) / 3600) or on() vector(999)"
|
||||
)
|
||||
JENKINS_CLEANUP_LAST_SUCCESS_AGE_HOURS = (
|
||||
"((time() - ariadne_jenkins_workspace_cleanup_last_success_timestamp_seconds) / 3600) or on() vector(999)"
|
||||
)
|
||||
JENKINS_CLEANUP_LAST_DELETED = "ariadne_jenkins_workspace_cleanup_last_deleted_total or on() vector(0)"
|
||||
JENKINS_CLEANUP_LAST_PLANNED = "ariadne_jenkins_workspace_cleanup_last_planned_total or on() vector(0)"
|
||||
JENKINS_WORKSPACE_PV_STALE_COUNT = (
|
||||
'sum((kube_persistentvolume_status_phase{phase=~"Released|Failed"} > bool 0) '
|
||||
'* on(persistentvolume) group_left(claim_namespace,name) '
|
||||
'kube_persistentvolume_claim_ref{claim_namespace="jenkins",name=~"pvc-workspace-.*"}) or on() vector(0)'
|
||||
)
|
||||
JENKINS_WORKSPACE_PV_STALE_AGE_HOURS = (
|
||||
'((time() - kube_persistentvolume_created) / 3600) '
|
||||
'* on(persistentvolume) group_left(claim_namespace,name) '
|
||||
'kube_persistentvolume_claim_ref{claim_namespace="jenkins",name=~"pvc-workspace-.*"} '
|
||||
'* on(persistentvolume) group_left() (kube_persistentvolume_status_phase{phase=~"Released|Failed"} > bool 0)'
|
||||
)
|
||||
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
|
||||
PLATFORM_TEST_SUITE_NAMES = [
|
||||
"ariadne",
|
||||
@ -1603,7 +1649,7 @@ def build_overview():
|
||||
legend_display="table",
|
||||
legend_placement="right",
|
||||
legend_calcs=["lastNotNull"],
|
||||
links=link_to("atlas-jobs"),
|
||||
links=link_to("atlas-testing"),
|
||||
)
|
||||
test_success["fieldConfig"]["defaults"]["min"] = 0
|
||||
test_success["fieldConfig"]["defaults"]["max"] = 100
|
||||
@ -2903,6 +2949,15 @@ def build_jobs_dashboard():
|
||||
{"color": "red", "value": 48},
|
||||
],
|
||||
}
|
||||
old_age_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "yellow", "value": 24},
|
||||
{"color": "orange", "value": 72},
|
||||
{"color": "red", "value": 168},
|
||||
],
|
||||
}
|
||||
recent_error_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
@ -2912,7 +2967,6 @@ def build_jobs_dashboard():
|
||||
{"color": "green", "value": 24},
|
||||
],
|
||||
}
|
||||
|
||||
task_error_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
@ -2922,6 +2976,15 @@ def build_jobs_dashboard():
|
||||
{"color": "red", "value": 5},
|
||||
],
|
||||
}
|
||||
count_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "yellow", "value": 1},
|
||||
{"color": "orange", "value": 2},
|
||||
{"color": "red", "value": 3},
|
||||
],
|
||||
}
|
||||
schedule_status_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
@ -2932,24 +2995,162 @@ def build_jobs_dashboard():
|
||||
}
|
||||
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
stat_panel(
|
||||
1,
|
||||
"Ariadne Task Errors (range)",
|
||||
ARIADNE_TASK_ERRORS_RANGE,
|
||||
{"h": 7, "w": 8, "x": 0, "y": 0},
|
||||
"Schedule Metrics Exported",
|
||||
ARIADNE_SCHEDULE_SIGNAL_COUNT,
|
||||
{"h": 4, "w": 4, "x": 0, "y": 0},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=task_error_thresholds,
|
||||
thresholds={
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "red", "value": None},
|
||||
{"color": "green", "value": 1},
|
||||
],
|
||||
},
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
2,
|
||||
"Schedule Tasks Stale (>36h)",
|
||||
ARIADNE_SCHEDULE_STALE_COUNT,
|
||||
{"h": 4, "w": 4, "x": 4, "y": 0},
|
||||
unit="none",
|
||||
thresholds=count_thresholds,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
3,
|
||||
"Schedule Tasks Missing Success",
|
||||
ARIADNE_SCHEDULE_MISSING_SUCCESS_COUNT,
|
||||
{"h": 4, "w": 4, "x": 8, "y": 0},
|
||||
unit="none",
|
||||
thresholds=count_thresholds,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
4,
|
||||
"Schedule Tasks Failed Last Run",
|
||||
ARIADNE_SCHEDULE_FAILED_LAST_COUNT,
|
||||
{"h": 4, "w": 4, "x": 12, "y": 0},
|
||||
unit="none",
|
||||
thresholds=count_thresholds,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
5,
|
||||
"Glue Jobs Stale (>36h)",
|
||||
GLUE_STALE_COUNT,
|
||||
{"h": 4, "w": 4, "x": 16, "y": 0},
|
||||
unit="none",
|
||||
thresholds=count_thresholds,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
6,
|
||||
"Jenkins Workspace PV Backlog",
|
||||
JENKINS_WORKSPACE_PV_STALE_COUNT,
|
||||
{"h": 4, "w": 4, "x": 20, "y": 0},
|
||||
unit="none",
|
||||
thresholds={
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "yellow", "value": 1},
|
||||
{"color": "orange", "value": 10},
|
||||
{"color": "red", "value": 25},
|
||||
],
|
||||
},
|
||||
)
|
||||
)
|
||||
schedule_list_panel = table_panel(
|
||||
7,
|
||||
"Ariadne Schedules: Last Success (h, newest first)",
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS,
|
||||
{"h": 8, "w": 12, "x": 0, "y": 4},
|
||||
unit="h",
|
||||
transformations=[
|
||||
{"id": "labelsToFields", "options": {}},
|
||||
{"id": "sortBy", "options": {"fields": ["Value"], "order": "asc"}},
|
||||
],
|
||||
instant=True,
|
||||
)
|
||||
schedule_list_panel["description"] = "Primary schedule inventory ordered by recency so fresh jobs stay at the top."
|
||||
panels.append(schedule_list_panel)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
8,
|
||||
"Ariadne Schedule Last Error (hours ago)",
|
||||
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS,
|
||||
{"h": 8, "w": 12, "x": 12, "y": 4},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
sort_order="asc",
|
||||
thresholds=recent_error_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
status_panel = bargauge_panel(
|
||||
9,
|
||||
"Ariadne Schedule Last Status",
|
||||
ARIADNE_SCHEDULE_LAST_STATUS,
|
||||
{"h": 8, "w": 8, "x": 0, "y": 12},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
sort_order="asc",
|
||||
thresholds=schedule_status_thresholds,
|
||||
decimals=0,
|
||||
)
|
||||
status_panel["description"] = "1 means the last run was ok. 0 means the last run ended in error."
|
||||
status_panel["fieldConfig"]["defaults"]["mappings"] = [
|
||||
{
|
||||
"id": 2,
|
||||
"type": "value",
|
||||
"options": {
|
||||
"0": {"text": "error"},
|
||||
"1": {"text": "ok"},
|
||||
},
|
||||
}
|
||||
]
|
||||
panels.append(status_panel)
|
||||
schedule_runs_panel = bargauge_panel(
|
||||
10,
|
||||
"Ariadne Schedule Runs (range)",
|
||||
ARIADNE_SCHEDULE_RUNS_RANGE,
|
||||
{"h": 8, "w": 8, "x": 8, "y": 12},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds={"mode": "absolute", "steps": [{"color": "green", "value": None}]},
|
||||
)
|
||||
schedule_runs_panel["description"] = "Number of runs by schedule task over the selected dashboard time range."
|
||||
panels.append(schedule_runs_panel)
|
||||
schedule_errors_panel = bargauge_panel(
|
||||
11,
|
||||
"Ariadne Schedule Errors (range)",
|
||||
ARIADNE_SCHEDULE_ERRORS_RANGE,
|
||||
{"h": 8, "w": 8, "x": 16, "y": 12},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=task_error_thresholds,
|
||||
)
|
||||
schedule_errors_panel["description"] = "Error run count by schedule task over the selected dashboard time range."
|
||||
panels.append(schedule_errors_panel)
|
||||
panels.append(
|
||||
{
|
||||
"id": 12,
|
||||
"type": "timeseries",
|
||||
"title": "Ariadne Attempts / Failures",
|
||||
"datasource": PROM_DS,
|
||||
"gridPos": {"h": 7, "w": 8, "x": 8, "y": 0},
|
||||
"gridPos": {"h": 7, "w": 12, "x": 0, "y": 20},
|
||||
"targets": [
|
||||
{"expr": ARIADNE_TASK_ATTEMPTS_SERIES, "refId": "A", "legendFormat": "Attempts"},
|
||||
{"expr": ARIADNE_TASK_FAILURES_SERIES, "refId": "B", "legendFormat": "Failures"},
|
||||
@ -2979,10 +3180,10 @@ def build_jobs_dashboard():
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
3,
|
||||
13,
|
||||
"One-off Job Pods (age hours)",
|
||||
ONEOFF_JOB_POD_AGE_HOURS,
|
||||
{"h": 7, "w": 8, "x": 16, "y": 0},
|
||||
{"h": 7, "w": 12, "x": 12, "y": 20},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{namespace}}/{{pod}}",
|
||||
@ -2991,186 +3192,251 @@ def build_jobs_dashboard():
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
14,
|
||||
"Glue Jobs Last Success (hours ago)",
|
||||
GLUE_LAST_SUCCESS_RANGE_HOURS,
|
||||
{"h": 6, "w": 12, "x": 0, "y": 27},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{namespace}}/{{cronjob}}",
|
||||
thresholds=age_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
15,
|
||||
"Glue Jobs Last Schedule (hours ago)",
|
||||
GLUE_LAST_SCHEDULE_RANGE_HOURS,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 27},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{namespace}}/{{cronjob}}",
|
||||
thresholds=age_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
4,
|
||||
"Glue Jobs Stale (>36h)",
|
||||
GLUE_STALE_COUNT,
|
||||
{"h": 4, "w": 4, "x": 0, "y": 7},
|
||||
16,
|
||||
"Jenkins Cleanup Signal Present",
|
||||
JENKINS_CLEANUP_SIGNAL_COUNT,
|
||||
{"h": 4, "w": 4, "x": 0, "y": 33},
|
||||
unit="none",
|
||||
instant=True,
|
||||
thresholds={
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "yellow", "value": 1},
|
||||
{"color": "orange", "value": 2},
|
||||
{"color": "red", "value": 3},
|
||||
{"color": "red", "value": None},
|
||||
{"color": "green", "value": 1},
|
||||
],
|
||||
},
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
5,
|
||||
"Glue Jobs Missing Success",
|
||||
GLUE_MISSING_COUNT,
|
||||
{"h": 4, "w": 4, "x": 4, "y": 7},
|
||||
unit="none",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
6,
|
||||
"Glue Jobs Suspended",
|
||||
GLUE_SUSPENDED_COUNT,
|
||||
{"h": 4, "w": 4, "x": 8, "y": 7},
|
||||
unit="none",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
7,
|
||||
"Ariadne Task Errors (1h)",
|
||||
ARIADNE_TASK_ERRORS_1H_TOTAL,
|
||||
{"h": 4, "w": 4, "x": 12, "y": 7},
|
||||
unit="none",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
8,
|
||||
"Ariadne Task Errors (24h)",
|
||||
ARIADNE_TASK_ERRORS_24H_TOTAL,
|
||||
{"h": 4, "w": 4, "x": 16, "y": 7},
|
||||
unit="none",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
9,
|
||||
"Ariadne Task Runs (1h)",
|
||||
ARIADNE_TASK_RUNS_1H_TOTAL,
|
||||
{"h": 4, "w": 4, "x": 20, "y": 7},
|
||||
unit="none",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
10,
|
||||
"Ariadne Schedule Last Error (hours ago)",
|
||||
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS,
|
||||
{"h": 6, "w": 12, "x": 0, "y": 17},
|
||||
17,
|
||||
"Jenkins Cleanup Last Run Age (h)",
|
||||
JENKINS_CLEANUP_LAST_RUN_AGE_HOURS,
|
||||
{"h": 4, "w": 4, "x": 4, "y": 33},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=recent_error_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
11,
|
||||
"Ariadne Schedule Last Success (hours ago, newest first)",
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 17},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
sort_order="asc",
|
||||
thresholds=age_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
12,
|
||||
"Glue Jobs Last Success (hours ago)",
|
||||
GLUE_LAST_SUCCESS_RANGE_HOURS,
|
||||
{"h": 6, "w": 12, "x": 0, "y": 23},
|
||||
stat_panel(
|
||||
18,
|
||||
"Jenkins Cleanup Last Success Age (h)",
|
||||
JENKINS_CLEANUP_LAST_SUCCESS_AGE_HOURS,
|
||||
{"h": 4, "w": 4, "x": 8, "y": 33},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{namespace}}/{{cronjob}}",
|
||||
thresholds=age_thresholds,
|
||||
decimals=2,
|
||||
instant=True,
|
||||
thresholds=age_thresholds,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
13,
|
||||
"Glue Jobs Last Schedule (hours ago)",
|
||||
GLUE_LAST_SCHEDULE_RANGE_HOURS,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 23},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{namespace}}/{{cronjob}}",
|
||||
thresholds=age_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
14,
|
||||
"Ariadne Task Errors (1h)",
|
||||
ARIADNE_TASK_ERRORS_1H,
|
||||
{"h": 6, "w": 12, "x": 0, "y": 29},
|
||||
stat_panel(
|
||||
19,
|
||||
"Jenkins Cleanup Planned (last run)",
|
||||
JENKINS_CLEANUP_LAST_PLANNED,
|
||||
{"h": 4, "w": 4, "x": 12, "y": 33},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=task_error_thresholds,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
15,
|
||||
"Ariadne Task Errors (30d)",
|
||||
ARIADNE_TASK_ERRORS_30D,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 29},
|
||||
stat_panel(
|
||||
20,
|
||||
"Jenkins Cleanup Deleted (last run)",
|
||||
JENKINS_CLEANUP_LAST_DELETED,
|
||||
{"h": 4, "w": 4, "x": 16, "y": 33},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=task_error_thresholds,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
16,
|
||||
stat_panel(
|
||||
21,
|
||||
"Ariadne Access Requests",
|
||||
ARIADNE_ACCESS_REQUESTS,
|
||||
{"h": 6, "w": 8, "x": 0, "y": 11},
|
||||
{"h": 4, "w": 4, "x": 20, "y": 33},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{status}}",
|
||||
)
|
||||
)
|
||||
coverage_panel = stat_panel(
|
||||
17,
|
||||
panels.append(
|
||||
timeseries_panel(
|
||||
22,
|
||||
"Jenkins Cleanup Runs (range)",
|
||||
None,
|
||||
{"h": 7, "w": 12, "x": 0, "y": 37},
|
||||
unit="none",
|
||||
targets=[
|
||||
{"refId": "A", "expr": JENKINS_CLEANUP_RUNS_RANGE, "legendFormat": "{{mode}}/{{status}}"},
|
||||
],
|
||||
legend_display="table",
|
||||
legend_placement="right",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
timeseries_panel(
|
||||
23,
|
||||
"Jenkins Cleanup Objects (range)",
|
||||
None,
|
||||
{"h": 7, "w": 12, "x": 12, "y": 37},
|
||||
unit="none",
|
||||
targets=[
|
||||
{"refId": "A", "expr": JENKINS_CLEANUP_OBJECTS_RANGE, "legendFormat": "{{kind}}/{{action}}/{{mode}}"},
|
||||
],
|
||||
legend_display="table",
|
||||
legend_placement="right",
|
||||
)
|
||||
)
|
||||
stale_volume_panel = bargauge_panel(
|
||||
24,
|
||||
"Jenkins Workspace PV Age (h, detached only)",
|
||||
JENKINS_WORKSPACE_PV_STALE_AGE_HOURS,
|
||||
{"h": 10, "w": 24, "x": 0, "y": 44},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{name}} -> {{persistentvolume}}",
|
||||
thresholds=old_age_thresholds,
|
||||
decimals=1,
|
||||
limit=40,
|
||||
)
|
||||
stale_volume_panel["description"] = (
|
||||
"Oldest detached Jenkins workspace volumes first. This is the direct cleanup backlog view."
|
||||
)
|
||||
panels.append(stale_volume_panel)
|
||||
|
||||
return {
|
||||
"uid": "atlas-jobs",
|
||||
"title": "Atlas Jobs",
|
||||
"folderUid": PRIVATE_FOLDER,
|
||||
"editable": True,
|
||||
"panels": panels,
|
||||
"time": {"from": "now-7d", "to": "now"},
|
||||
"annotations": {"list": []},
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": ["atlas", "jobs", "glue"],
|
||||
}
|
||||
|
||||
|
||||
def build_testing_dashboard():
|
||||
panels = []
|
||||
pass_rate_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "red", "value": None},
|
||||
{"color": "orange", "value": 80},
|
||||
{"color": "yellow", "value": 95},
|
||||
{"color": "green", "value": 99},
|
||||
],
|
||||
}
|
||||
failures_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "yellow", "value": 1},
|
||||
{"color": "orange", "value": 3},
|
||||
{"color": "red", "value": 5},
|
||||
],
|
||||
}
|
||||
|
||||
pass_rate_panel = stat_panel(
|
||||
1,
|
||||
"Platform Test Success Rate (30d)",
|
||||
TEST_SUCCESS_RATE,
|
||||
{"h": 6, "w": 4, "x": 8, "y": 11},
|
||||
{"h": 4, "w": 6, "x": 0, "y": 0},
|
||||
unit="percent",
|
||||
decimals=2,
|
||||
instant=True,
|
||||
thresholds=pass_rate_thresholds,
|
||||
)
|
||||
coverage_panel["description"] = "Internal rollup across Ariadne task runs and Metis build/flash outcomes."
|
||||
panels.append(coverage_panel)
|
||||
tests_panel = table_panel(
|
||||
18,
|
||||
pass_rate_panel["description"] = "Overall success rate across tracked suites over the last 30 days."
|
||||
panels.append(pass_rate_panel)
|
||||
failures_panel = stat_panel(
|
||||
2,
|
||||
"Platform Test Failures (24h)",
|
||||
TEST_FAILURES_24H_TOTAL,
|
||||
{"h": 4, "w": 6, "x": 6, "y": 0},
|
||||
unit="none",
|
||||
instant=True,
|
||||
thresholds=failures_thresholds,
|
||||
)
|
||||
failures_panel["description"] = "Total failed runs in the last 24 hours."
|
||||
panels.append(failures_panel)
|
||||
activity_panel = table_panel(
|
||||
3,
|
||||
"Platform Test Activity (30d)",
|
||||
PLATFORM_TEST_ACTIVITY_30D,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 11},
|
||||
{"h": 8, "w": 12, "x": 12, "y": 0},
|
||||
unit="none",
|
||||
transformations=[{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}],
|
||||
transformations=[
|
||||
{"id": "labelsToFields", "options": {}},
|
||||
{"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}},
|
||||
],
|
||||
instant=True,
|
||||
)
|
||||
tests_panel["description"] = (
|
||||
"Atlas Overview test panels depend on this internal activity table sourced from Ariadne and Metis counters."
|
||||
activity_panel["description"] = "Suite/status event counts over 30 days."
|
||||
panels.append(activity_panel)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
4,
|
||||
"Platform Test Failures by Suite (24h)",
|
||||
PLATFORM_TEST_FAILURES_24H_BY_SUITE,
|
||||
{"h": 8, "w": 12, "x": 0, "y": 8},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{suite}}",
|
||||
thresholds=failures_thresholds,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
5,
|
||||
"Platform Test Success Rate by Suite (24h, lowest first)",
|
||||
PLATFORM_TEST_SUCCESS_RATE_24H_BY_SUITE,
|
||||
{"h": 8, "w": 12, "x": 12, "y": 8},
|
||||
unit="percent",
|
||||
instant=True,
|
||||
legend="{{suite}}",
|
||||
sort_order="asc",
|
||||
thresholds=pass_rate_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
)
|
||||
panels.append(tests_panel)
|
||||
suite_panel = timeseries_panel(
|
||||
19,
|
||||
6,
|
||||
"Platform Test Success Rate by Suite",
|
||||
None,
|
||||
{"h": 6, "w": 16, "x": 8, "y": 17},
|
||||
{"h": 8, "w": 24, "x": 0, "y": 16},
|
||||
unit="percent",
|
||||
targets=PLATFORM_TEST_SUCCESS_RATE_SUITE_TARGETS,
|
||||
legend_display="list",
|
||||
@ -3187,69 +3453,20 @@ def build_jobs_dashboard():
|
||||
"pointSize": 4,
|
||||
"spanNulls": True,
|
||||
}
|
||||
suite_panel["description"] = (
|
||||
"Per-run interval pass points (0-100) per suite. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published."
|
||||
)
|
||||
suite_panel["description"] = "Trend line per suite. Flat gaps mean no runs in that interval."
|
||||
panels.append(suite_panel)
|
||||
status_panel = bargauge_panel(
|
||||
20,
|
||||
"Ariadne Schedule Last Status",
|
||||
ARIADNE_SCHEDULE_LAST_STATUS,
|
||||
{"h": 8, "w": 8, "x": 0, "y": 35},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
sort_order="asc",
|
||||
thresholds=schedule_status_thresholds,
|
||||
decimals=0,
|
||||
)
|
||||
status_panel["description"] = "1 means the last run was ok. 0 means the last run ended in error."
|
||||
status_panel["fieldConfig"]["defaults"]["mappings"] = [
|
||||
{
|
||||
"type": "value",
|
||||
"options": {
|
||||
"0": {"text": "error"},
|
||||
"1": {"text": "ok"},
|
||||
},
|
||||
}
|
||||
]
|
||||
panels.append(status_panel)
|
||||
schedule_runs_panel = bargauge_panel(
|
||||
21,
|
||||
"Ariadne Schedule Runs (range)",
|
||||
ARIADNE_SCHEDULE_RUNS_RANGE,
|
||||
{"h": 8, "w": 8, "x": 8, "y": 35},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds={"mode": "absolute", "steps": [{"color": "green", "value": None}]},
|
||||
)
|
||||
schedule_runs_panel["description"] = "Number of runs by schedule task over the selected dashboard time range."
|
||||
panels.append(schedule_runs_panel)
|
||||
schedule_errors_panel = bargauge_panel(
|
||||
22,
|
||||
"Ariadne Schedule Errors (range)",
|
||||
ARIADNE_SCHEDULE_ERRORS_RANGE,
|
||||
{"h": 8, "w": 8, "x": 16, "y": 35},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=task_error_thresholds,
|
||||
)
|
||||
schedule_errors_panel["description"] = "Error run count by schedule task over the selected dashboard time range."
|
||||
panels.append(schedule_errors_panel)
|
||||
|
||||
return {
|
||||
"uid": "atlas-jobs",
|
||||
"title": "Atlas Jobs",
|
||||
"uid": "atlas-testing",
|
||||
"title": "Atlas Testing",
|
||||
"folderUid": PRIVATE_FOLDER,
|
||||
"editable": True,
|
||||
"panels": panels,
|
||||
"time": {"from": "now-7d", "to": "now"},
|
||||
"time": {"from": "now-30d", "to": "now"},
|
||||
"annotations": {"list": []},
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": ["atlas", "jobs", "glue"],
|
||||
"tags": ["atlas", "testing", "quality"],
|
||||
}
|
||||
|
||||
|
||||
@ -3529,6 +3746,10 @@ DASHBOARDS = {
|
||||
"builder": build_jobs_dashboard,
|
||||
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-jobs.yaml",
|
||||
},
|
||||
"atlas-testing": {
|
||||
"builder": build_testing_dashboard,
|
||||
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-testing.yaml",
|
||||
},
|
||||
"atlas-power": {
|
||||
"builder": build_power_dashboard,
|
||||
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-power.yaml",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -1915,8 +1915,8 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-jobs dashboard",
|
||||
"url": "/d/atlas-jobs",
|
||||
"title": "Open atlas-testing dashboard",
|
||||
"url": "/d/atlas-testing",
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
|
||||
462
services/monitoring/dashboards/atlas-testing.json
Normal file
462
services/monitoring/dashboards/atlas-testing.json
Normal file
@ -0,0 +1,462 @@
|
||||
{
|
||||
"uid": "atlas-testing",
|
||||
"title": "Atlas Testing",
|
||||
"folderUid": "atlas-internal",
|
||||
"editable": true,
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "stat",
|
||||
"title": "Platform Test Success Rate (30d)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d])) or on() vector(0))), 1)",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 80
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 95
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 99
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "center",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Overall success rate across tracked suites over the last 30 days."
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"type": "stat",
|
||||
"title": "Platform Test Failures (24h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])) or on() vector(0))",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 3
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 5
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "center",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Total failed runs in the last 24 hours."
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"type": "table",
|
||||
"title": "Platform Test Activity (30d)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (suite, status) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d]))",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"filterable": true
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"columnFilters": false
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Suite/status event counts over 30 days."
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"type": "bargauge",
|
||||
"title": "Platform Test Failures by Suite (24h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{suite}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"min": 0,
|
||||
"max": null,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 3
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"type": "bargauge",
|
||||
"title": "Platform Test Success Rate by Suite (24h, lowest first)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))) > 0))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{suite}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 80
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 95
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 99
|
||||
}
|
||||
]
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "asc"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"type": "timeseries",
|
||||
"title": "Platform Test Success Rate by Suite",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0)",
|
||||
"legendFormat": "ariadne"
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0)",
|
||||
"legendFormat": "metis"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0)",
|
||||
"legendFormat": "ananke"
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)",
|
||||
"legendFormat": "atlasbot"
|
||||
},
|
||||
{
|
||||
"refId": "E",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)",
|
||||
"legendFormat": "lesavka"
|
||||
},
|
||||
{
|
||||
"refId": "F",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)",
|
||||
"legendFormat": "pegasus"
|
||||
},
|
||||
{
|
||||
"refId": "G",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)",
|
||||
"legendFormat": "soteria"
|
||||
},
|
||||
{
|
||||
"refId": "H",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)",
|
||||
"legendFormat": "titan-iac"
|
||||
},
|
||||
{
|
||||
"refId": "I",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)",
|
||||
"legendFormat": "bstein-home"
|
||||
},
|
||||
{
|
||||
"refId": "J",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)",
|
||||
"legendFormat": "arcanagon"
|
||||
},
|
||||
{
|
||||
"refId": "K",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0)",
|
||||
"legendFormat": "data-prepper"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100,
|
||||
"custom": {
|
||||
"drawStyle": "line",
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "always",
|
||||
"pointSize": 4,
|
||||
"spanNulls": true
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"description": "Trend line per suite. Flat gaps mean no runs in that interval."
|
||||
}
|
||||
],
|
||||
"time": {
|
||||
"from": "now-30d",
|
||||
"to": "now"
|
||||
},
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"atlas",
|
||||
"testing",
|
||||
"quality"
|
||||
]
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@ -1924,8 +1924,8 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-jobs dashboard",
|
||||
"url": "/d/atlas-jobs",
|
||||
"title": "Open atlas-testing dashboard",
|
||||
"url": "/d/atlas-testing",
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
|
||||
471
services/monitoring/grafana-dashboard-testing.yaml
Normal file
471
services/monitoring/grafana-dashboard-testing.yaml
Normal file
@ -0,0 +1,471 @@
|
||||
# services/monitoring/grafana-dashboard-testing.yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: grafana-dashboard-testing
|
||||
labels:
|
||||
grafana_dashboard: "1"
|
||||
data:
|
||||
atlas-testing.json: |
|
||||
{
|
||||
"uid": "atlas-testing",
|
||||
"title": "Atlas Testing",
|
||||
"folderUid": "atlas-internal",
|
||||
"editable": true,
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "stat",
|
||||
"title": "Platform Test Success Rate (30d)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "100 * ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[30d])) or on() vector(0))) / clamp_min(((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d])) or on() vector(0))), 1)",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 80
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 95
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 99
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "percent",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "center",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Overall success rate across tracked suites over the last 30 days."
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"type": "stat",
|
||||
"title": "Platform Test Failures (24h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 4,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "(sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])) or on() vector(0))",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 3
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 5
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"displayMode": "auto"
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "center",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value"
|
||||
},
|
||||
"description": "Total failed runs in the last 24 hours."
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"type": "table",
|
||||
"title": "Platform Test Activity (30d)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (suite, status) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[30d]))",
|
||||
"refId": "A",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"custom": {
|
||||
"filterable": true
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"showHeader": true,
|
||||
"columnFilters": false
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "labelsToFields",
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Suite/status event counts over 30 days."
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"type": "bargauge",
|
||||
"title": "Platform Test Failures by Suite (24h)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status!~\"ok|passed|success\"}[24h])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{suite}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"min": 0,
|
||||
"max": null,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 3
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"type": "bargauge",
|
||||
"title": "Platform Test Success Rate by Suite (24h, lowest first)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\",status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total{suite=~\"ariadne|metis|ananke|atlasbot|lesavka|pegasus|soteria|titan-iac|bstein-home|arcanagon|data-prepper\"}[24h]))) > 0))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{suite}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 80
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 95
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 99
|
||||
}
|
||||
]
|
||||
},
|
||||
"decimals": 2
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "asc"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"type": "timeseries",
|
||||
"title": "Platform Test Success Rate by Suite",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0)",
|
||||
"legendFormat": "ariadne"
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0)",
|
||||
"legendFormat": "metis"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0)",
|
||||
"legendFormat": "ananke"
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)",
|
||||
"legendFormat": "atlasbot"
|
||||
},
|
||||
{
|
||||
"refId": "E",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)",
|
||||
"legendFormat": "lesavka"
|
||||
},
|
||||
{
|
||||
"refId": "F",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)",
|
||||
"legendFormat": "pegasus"
|
||||
},
|
||||
{
|
||||
"refId": "G",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)",
|
||||
"legendFormat": "soteria"
|
||||
},
|
||||
{
|
||||
"refId": "H",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)",
|
||||
"legendFormat": "titan-iac"
|
||||
},
|
||||
{
|
||||
"refId": "I",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)",
|
||||
"legendFormat": "bstein-home"
|
||||
},
|
||||
{
|
||||
"refId": "J",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)",
|
||||
"legendFormat": "arcanagon"
|
||||
},
|
||||
{
|
||||
"refId": "K",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0)",
|
||||
"legendFormat": "data-prepper"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100,
|
||||
"custom": {
|
||||
"drawStyle": "line",
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 2,
|
||||
"fillOpacity": 10,
|
||||
"showPoints": "always",
|
||||
"pointSize": 4,
|
||||
"spanNulls": true
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"description": "Trend line per suite. Flat gaps mean no runs in that interval."
|
||||
}
|
||||
],
|
||||
"time": {
|
||||
"from": "now-30d",
|
||||
"to": "now"
|
||||
},
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"atlas",
|
||||
"testing",
|
||||
"quality"
|
||||
]
|
||||
}
|
||||
@ -16,6 +16,7 @@ resources:
|
||||
- grafana-dashboard-power.yaml
|
||||
- grafana-dashboard-mail.yaml
|
||||
- grafana-dashboard-jobs.yaml
|
||||
- grafana-dashboard-testing.yaml
|
||||
- dcgm-exporter.yaml
|
||||
- jetson-tegrastats-exporter.yaml
|
||||
- postmark-exporter-service.yaml
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user