monitoring: refresh testing dashboard
This commit is contained in:
parent
a9f6b04baa
commit
698b2fd96b
1
.gitignore
vendored
1
.gitignore
vendored
@ -6,4 +6,5 @@ __pycache__/
|
||||
*.py[cod]
|
||||
.pytest_cache
|
||||
.venv
|
||||
.venv-ci
|
||||
tmp/
|
||||
|
||||
@ -339,6 +339,9 @@ GLUE_SUSPENDED_COUNT = f"sum({GLUE_SUSPENDED})"
|
||||
ARIADNE_TASK_ERRORS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[24h]))'
|
||||
ARIADNE_TASK_SUCCESS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="ok"}[24h]))'
|
||||
ARIADNE_TASK_RUNS_BY_STATUS_1H = 'sum by (status) (increase(ariadne_task_runs_total[1h]))'
|
||||
ARIADNE_TASK_ERRORS_1H_TOTAL = 'sum(increase(ariadne_task_runs_total{status="error"}[1h]))'
|
||||
ARIADNE_TASK_ERRORS_24H_TOTAL = 'sum(increase(ariadne_task_runs_total{status="error"}[24h]))'
|
||||
ARIADNE_TASK_RUNS_1H_TOTAL = 'sum(increase(ariadne_task_runs_total[1h]))'
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600"
|
||||
ARIADNE_SCHEDULE_LAST_ERROR_HOURS = "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600"
|
||||
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
|
||||
@ -696,8 +699,10 @@ def bargauge_panel(
|
||||
grid,
|
||||
*,
|
||||
unit="none",
|
||||
legend=None,
|
||||
links=None,
|
||||
limit=None,
|
||||
sort_order="desc",
|
||||
thresholds=None,
|
||||
decimals=None,
|
||||
instant=False,
|
||||
@ -710,7 +715,12 @@ def bargauge_panel(
|
||||
"datasource": PROM_DS,
|
||||
"gridPos": grid,
|
||||
"targets": [
|
||||
{"expr": expr, "refId": "A", "legendFormat": "{{node}}", **({"instant": True} if instant else {})}
|
||||
{
|
||||
"expr": expr,
|
||||
"refId": "A",
|
||||
"legendFormat": legend or "{{node}}",
|
||||
**({"instant": True} if instant else {}),
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
@ -748,7 +758,7 @@ def bargauge_panel(
|
||||
panel["transformations"] = [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {"fields": ["Value"], "order": "desc"},
|
||||
"options": {"fields": ["Value"], "order": sort_order},
|
||||
}
|
||||
]
|
||||
if limit:
|
||||
@ -2163,7 +2173,24 @@ def build_mail_dashboard():
|
||||
|
||||
def build_testing_dashboard():
|
||||
panels = []
|
||||
sort_desc = [{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}]
|
||||
age_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "yellow", "value": 6},
|
||||
{"color": "orange", "value": 24},
|
||||
{"color": "red", "value": 48},
|
||||
],
|
||||
}
|
||||
recent_error_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "red", "value": None},
|
||||
{"color": "orange", "value": 1},
|
||||
{"color": "yellow", "value": 6},
|
||||
{"color": "green", "value": 24},
|
||||
],
|
||||
}
|
||||
|
||||
panels.append(
|
||||
stat_panel(
|
||||
@ -2184,66 +2211,56 @@ def build_testing_dashboard():
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
stat_panel(
|
||||
2,
|
||||
"Glue Jobs Missing Success",
|
||||
GLUE_MISSING_ACTIVE,
|
||||
{"h": 4, "w": 6, "x": 6, "y": 0},
|
||||
GLUE_MISSING_COUNT,
|
||||
{"h": 4, "w": 4, "x": 4, "y": 0},
|
||||
unit="none",
|
||||
transformations=sort_desc,
|
||||
instant=True,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
stat_panel(
|
||||
3,
|
||||
"Glue Jobs Suspended",
|
||||
GLUE_SUSPENDED,
|
||||
{"h": 4, "w": 6, "x": 12, "y": 0},
|
||||
GLUE_SUSPENDED_COUNT,
|
||||
{"h": 4, "w": 4, "x": 8, "y": 0},
|
||||
unit="none",
|
||||
transformations=sort_desc,
|
||||
instant=True,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
stat_panel(
|
||||
4,
|
||||
"Glue Jobs Active Runs",
|
||||
GLUE_ACTIVE,
|
||||
{"h": 4, "w": 6, "x": 18, "y": 0},
|
||||
"Ariadne Task Errors (1h)",
|
||||
ARIADNE_TASK_ERRORS_1H_TOTAL,
|
||||
{"h": 4, "w": 4, "x": 12, "y": 0},
|
||||
unit="none",
|
||||
transformations=sort_desc,
|
||||
instant=True,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
stat_panel(
|
||||
5,
|
||||
"Glue Jobs Last Success (hours ago)",
|
||||
GLUE_LAST_SUCCESS_AGE_HOURS,
|
||||
{"h": 8, "w": 12, "x": 0, "y": 4},
|
||||
unit="h",
|
||||
transformations=sort_desc,
|
||||
instant=True,
|
||||
"Ariadne Task Errors (24h)",
|
||||
ARIADNE_TASK_ERRORS_24H_TOTAL,
|
||||
{"h": 4, "w": 4, "x": 16, "y": 0},
|
||||
unit="none",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
stat_panel(
|
||||
6,
|
||||
"Glue Jobs Last Schedule (hours ago)",
|
||||
GLUE_LAST_SCHEDULE_AGE_HOURS,
|
||||
{"h": 8, "w": 12, "x": 12, "y": 4},
|
||||
unit="h",
|
||||
transformations=sort_desc,
|
||||
instant=True,
|
||||
"Ariadne Task Runs (1h)",
|
||||
ARIADNE_TASK_RUNS_1H_TOTAL,
|
||||
{"h": 4, "w": 4, "x": 20, "y": 0},
|
||||
unit="none",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
timeseries_panel(
|
||||
12,
|
||||
7,
|
||||
"Ariadne Task Runs vs Errors (1h)",
|
||||
ARIADNE_TASK_RUNS_BY_STATUS_1H,
|
||||
{"h": 6, "w": 24, "x": 0, "y": 12},
|
||||
{"h": 6, "w": 24, "x": 0, "y": 4},
|
||||
unit="none",
|
||||
legend="{{status}}",
|
||||
legend_display="table",
|
||||
@ -2251,55 +2268,110 @@ def build_testing_dashboard():
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
7,
|
||||
bargauge_panel(
|
||||
8,
|
||||
"Ariadne Task Errors (24h)",
|
||||
ARIADNE_TASK_ERRORS_24H,
|
||||
{"h": 6, "w": 12, "x": 0, "y": 18},
|
||||
{"h": 8, "w": 12, "x": 0, "y": 10},
|
||||
unit="none",
|
||||
transformations=sort_desc,
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds={
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "green", "value": None},
|
||||
{"color": "yellow", "value": 1},
|
||||
{"color": "orange", "value": 3},
|
||||
{"color": "red", "value": 5},
|
||||
],
|
||||
},
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
8,
|
||||
"Ariadne Schedule Last Success (hours ago)",
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 18},
|
||||
unit="h",
|
||||
transformations=sort_desc,
|
||||
instant=True,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
bargauge_panel(
|
||||
9,
|
||||
"Ariadne Access Requests",
|
||||
ARIADNE_ACCESS_REQUESTS,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 24},
|
||||
"Ariadne Task Success (24h)",
|
||||
ARIADNE_TASK_SUCCESS_24H,
|
||||
{"h": 8, "w": 12, "x": 12, "y": 10},
|
||||
unit="none",
|
||||
transformations=sort_desc,
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds={
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "red", "value": None},
|
||||
{"color": "orange", "value": 1},
|
||||
{"color": "yellow", "value": 5},
|
||||
{"color": "green", "value": 10},
|
||||
],
|
||||
},
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
13,
|
||||
bargauge_panel(
|
||||
10,
|
||||
"Ariadne Schedule Last Error (hours ago)",
|
||||
ARIADNE_SCHEDULE_LAST_ERROR_HOURS,
|
||||
{"h": 6, "w": 12, "x": 0, "y": 24},
|
||||
{"h": 8, "w": 12, "x": 0, "y": 18},
|
||||
unit="h",
|
||||
transformations=sort_desc,
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=recent_error_thresholds,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
11,
|
||||
"Ariadne Schedule Last Success (hours ago)",
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS,
|
||||
{"h": 8, "w": 12, "x": 12, "y": 18},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=age_thresholds,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
12,
|
||||
"Glue Jobs Last Success (hours ago)",
|
||||
GLUE_LAST_SUCCESS_AGE_HOURS,
|
||||
{"h": 8, "w": 12, "x": 0, "y": 26},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{namespace}}/{{cronjob}}",
|
||||
thresholds=age_thresholds,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
13,
|
||||
"Glue Jobs Last Schedule (hours ago)",
|
||||
GLUE_LAST_SCHEDULE_AGE_HOURS,
|
||||
{"h": 8, "w": 12, "x": 12, "y": 26},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{namespace}}/{{cronjob}}",
|
||||
thresholds=age_thresholds,
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
14,
|
||||
"Ariadne Access Requests",
|
||||
ARIADNE_ACCESS_REQUESTS,
|
||||
{"h": 6, "w": 8, "x": 0, "y": 34},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{status}}",
|
||||
)
|
||||
)
|
||||
panels.append(
|
||||
stat_panel(
|
||||
10,
|
||||
15,
|
||||
"Ariadne CI Coverage (%)",
|
||||
ARIADNE_CI_COVERAGE,
|
||||
{"h": 4, "w": 6, "x": 0, "y": 30},
|
||||
{"h": 6, "w": 4, "x": 8, "y": 34},
|
||||
unit="percent",
|
||||
decimals=1,
|
||||
instant=True,
|
||||
@ -2308,12 +2380,12 @@ def build_testing_dashboard():
|
||||
)
|
||||
panels.append(
|
||||
table_panel(
|
||||
11,
|
||||
16,
|
||||
"Ariadne CI Tests (latest)",
|
||||
ARIADNE_CI_TESTS,
|
||||
{"h": 6, "w": 18, "x": 6, "y": 30},
|
||||
{"h": 6, "w": 12, "x": 12, "y": 34},
|
||||
unit="none",
|
||||
transformations=sort_desc,
|
||||
transformations=[{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}],
|
||||
instant=True,
|
||||
)
|
||||
)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user