monitoring: refresh testing dashboard
This commit is contained in:
parent
a9f6b04baa
commit
698b2fd96b
1
.gitignore
vendored
1
.gitignore
vendored
@ -6,4 +6,5 @@ __pycache__/
|
|||||||
*.py[cod]
|
*.py[cod]
|
||||||
.pytest_cache
|
.pytest_cache
|
||||||
.venv
|
.venv
|
||||||
|
.venv-ci
|
||||||
tmp/
|
tmp/
|
||||||
|
|||||||
@ -339,6 +339,9 @@ GLUE_SUSPENDED_COUNT = f"sum({GLUE_SUSPENDED})"
|
|||||||
ARIADNE_TASK_ERRORS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[24h]))'
|
ARIADNE_TASK_ERRORS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[24h]))'
|
||||||
ARIADNE_TASK_SUCCESS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="ok"}[24h]))'
|
ARIADNE_TASK_SUCCESS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="ok"}[24h]))'
|
||||||
ARIADNE_TASK_RUNS_BY_STATUS_1H = 'sum by (status) (increase(ariadne_task_runs_total[1h]))'
|
ARIADNE_TASK_RUNS_BY_STATUS_1H = 'sum by (status) (increase(ariadne_task_runs_total[1h]))'
|
||||||
|
ARIADNE_TASK_ERRORS_1H_TOTAL = 'sum(increase(ariadne_task_runs_total{status="error"}[1h]))'
|
||||||
|
ARIADNE_TASK_ERRORS_24H_TOTAL = 'sum(increase(ariadne_task_runs_total{status="error"}[24h]))'
|
||||||
|
ARIADNE_TASK_RUNS_1H_TOTAL = 'sum(increase(ariadne_task_runs_total[1h]))'
|
||||||
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600"
|
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600"
|
||||||
ARIADNE_SCHEDULE_LAST_ERROR_HOURS = "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600"
|
ARIADNE_SCHEDULE_LAST_ERROR_HOURS = "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600"
|
||||||
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
|
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
|
||||||
@ -696,8 +699,10 @@ def bargauge_panel(
|
|||||||
grid,
|
grid,
|
||||||
*,
|
*,
|
||||||
unit="none",
|
unit="none",
|
||||||
|
legend=None,
|
||||||
links=None,
|
links=None,
|
||||||
limit=None,
|
limit=None,
|
||||||
|
sort_order="desc",
|
||||||
thresholds=None,
|
thresholds=None,
|
||||||
decimals=None,
|
decimals=None,
|
||||||
instant=False,
|
instant=False,
|
||||||
@ -710,7 +715,12 @@ def bargauge_panel(
|
|||||||
"datasource": PROM_DS,
|
"datasource": PROM_DS,
|
||||||
"gridPos": grid,
|
"gridPos": grid,
|
||||||
"targets": [
|
"targets": [
|
||||||
{"expr": expr, "refId": "A", "legendFormat": "{{node}}", **({"instant": True} if instant else {})}
|
{
|
||||||
|
"expr": expr,
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": legend or "{{node}}",
|
||||||
|
**({"instant": True} if instant else {}),
|
||||||
|
}
|
||||||
],
|
],
|
||||||
"fieldConfig": {
|
"fieldConfig": {
|
||||||
"defaults": {
|
"defaults": {
|
||||||
@ -748,7 +758,7 @@ def bargauge_panel(
|
|||||||
panel["transformations"] = [
|
panel["transformations"] = [
|
||||||
{
|
{
|
||||||
"id": "sortBy",
|
"id": "sortBy",
|
||||||
"options": {"fields": ["Value"], "order": "desc"},
|
"options": {"fields": ["Value"], "order": sort_order},
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
if limit:
|
if limit:
|
||||||
@ -2163,7 +2173,24 @@ def build_mail_dashboard():
|
|||||||
|
|
||||||
def build_testing_dashboard():
|
def build_testing_dashboard():
|
||||||
panels = []
|
panels = []
|
||||||
sort_desc = [{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}]
|
age_thresholds = {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{"color": "green", "value": None},
|
||||||
|
{"color": "yellow", "value": 6},
|
||||||
|
{"color": "orange", "value": 24},
|
||||||
|
{"color": "red", "value": 48},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
recent_error_thresholds = {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{"color": "red", "value": None},
|
||||||
|
{"color": "orange", "value": 1},
|
||||||
|
{"color": "yellow", "value": 6},
|
||||||
|
{"color": "green", "value": 24},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
panels.append(
|
panels.append(
|
||||||
stat_panel(
|
stat_panel(
|
||||||
@ -2184,66 +2211,56 @@ def build_testing_dashboard():
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
table_panel(
|
stat_panel(
|
||||||
2,
|
2,
|
||||||
"Glue Jobs Missing Success",
|
"Glue Jobs Missing Success",
|
||||||
GLUE_MISSING_ACTIVE,
|
GLUE_MISSING_COUNT,
|
||||||
{"h": 4, "w": 6, "x": 6, "y": 0},
|
{"h": 4, "w": 4, "x": 4, "y": 0},
|
||||||
unit="none",
|
unit="none",
|
||||||
transformations=sort_desc,
|
|
||||||
instant=True,
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
table_panel(
|
stat_panel(
|
||||||
3,
|
3,
|
||||||
"Glue Jobs Suspended",
|
"Glue Jobs Suspended",
|
||||||
GLUE_SUSPENDED,
|
GLUE_SUSPENDED_COUNT,
|
||||||
{"h": 4, "w": 6, "x": 12, "y": 0},
|
{"h": 4, "w": 4, "x": 8, "y": 0},
|
||||||
unit="none",
|
unit="none",
|
||||||
transformations=sort_desc,
|
|
||||||
instant=True,
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
table_panel(
|
stat_panel(
|
||||||
4,
|
4,
|
||||||
"Glue Jobs Active Runs",
|
"Ariadne Task Errors (1h)",
|
||||||
GLUE_ACTIVE,
|
ARIADNE_TASK_ERRORS_1H_TOTAL,
|
||||||
{"h": 4, "w": 6, "x": 18, "y": 0},
|
{"h": 4, "w": 4, "x": 12, "y": 0},
|
||||||
unit="none",
|
unit="none",
|
||||||
transformations=sort_desc,
|
|
||||||
instant=True,
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
table_panel(
|
stat_panel(
|
||||||
5,
|
5,
|
||||||
"Glue Jobs Last Success (hours ago)",
|
"Ariadne Task Errors (24h)",
|
||||||
GLUE_LAST_SUCCESS_AGE_HOURS,
|
ARIADNE_TASK_ERRORS_24H_TOTAL,
|
||||||
{"h": 8, "w": 12, "x": 0, "y": 4},
|
{"h": 4, "w": 4, "x": 16, "y": 0},
|
||||||
unit="h",
|
unit="none",
|
||||||
transformations=sort_desc,
|
|
||||||
instant=True,
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
table_panel(
|
stat_panel(
|
||||||
6,
|
6,
|
||||||
"Glue Jobs Last Schedule (hours ago)",
|
"Ariadne Task Runs (1h)",
|
||||||
GLUE_LAST_SCHEDULE_AGE_HOURS,
|
ARIADNE_TASK_RUNS_1H_TOTAL,
|
||||||
{"h": 8, "w": 12, "x": 12, "y": 4},
|
{"h": 4, "w": 4, "x": 20, "y": 0},
|
||||||
unit="h",
|
unit="none",
|
||||||
transformations=sort_desc,
|
|
||||||
instant=True,
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
timeseries_panel(
|
timeseries_panel(
|
||||||
12,
|
7,
|
||||||
"Ariadne Task Runs vs Errors (1h)",
|
"Ariadne Task Runs vs Errors (1h)",
|
||||||
ARIADNE_TASK_RUNS_BY_STATUS_1H,
|
ARIADNE_TASK_RUNS_BY_STATUS_1H,
|
||||||
{"h": 6, "w": 24, "x": 0, "y": 12},
|
{"h": 6, "w": 24, "x": 0, "y": 4},
|
||||||
unit="none",
|
unit="none",
|
||||||
legend="{{status}}",
|
legend="{{status}}",
|
||||||
legend_display="table",
|
legend_display="table",
|
||||||
@ -2251,55 +2268,110 @@ def build_testing_dashboard():
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
table_panel(
|
bargauge_panel(
|
||||||
7,
|
8,
|
||||||
"Ariadne Task Errors (24h)",
|
"Ariadne Task Errors (24h)",
|
||||||
ARIADNE_TASK_ERRORS_24H,
|
ARIADNE_TASK_ERRORS_24H,
|
||||||
{"h": 6, "w": 12, "x": 0, "y": 18},
|
{"h": 8, "w": 12, "x": 0, "y": 10},
|
||||||
unit="none",
|
unit="none",
|
||||||
transformations=sort_desc,
|
|
||||||
instant=True,
|
instant=True,
|
||||||
|
legend="{{task}}",
|
||||||
|
thresholds={
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{"color": "green", "value": None},
|
||||||
|
{"color": "yellow", "value": 1},
|
||||||
|
{"color": "orange", "value": 3},
|
||||||
|
{"color": "red", "value": 5},
|
||||||
|
],
|
||||||
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
table_panel(
|
bargauge_panel(
|
||||||
8,
|
|
||||||
"Ariadne Schedule Last Success (hours ago)",
|
|
||||||
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS,
|
|
||||||
{"h": 6, "w": 12, "x": 12, "y": 18},
|
|
||||||
unit="h",
|
|
||||||
transformations=sort_desc,
|
|
||||||
instant=True,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
panels.append(
|
|
||||||
table_panel(
|
|
||||||
9,
|
9,
|
||||||
"Ariadne Access Requests",
|
"Ariadne Task Success (24h)",
|
||||||
ARIADNE_ACCESS_REQUESTS,
|
ARIADNE_TASK_SUCCESS_24H,
|
||||||
{"h": 6, "w": 12, "x": 12, "y": 24},
|
{"h": 8, "w": 12, "x": 12, "y": 10},
|
||||||
unit="none",
|
unit="none",
|
||||||
transformations=sort_desc,
|
|
||||||
instant=True,
|
instant=True,
|
||||||
|
legend="{{task}}",
|
||||||
|
thresholds={
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{"color": "red", "value": None},
|
||||||
|
{"color": "orange", "value": 1},
|
||||||
|
{"color": "yellow", "value": 5},
|
||||||
|
{"color": "green", "value": 10},
|
||||||
|
],
|
||||||
|
},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
table_panel(
|
bargauge_panel(
|
||||||
13,
|
10,
|
||||||
"Ariadne Schedule Last Error (hours ago)",
|
"Ariadne Schedule Last Error (hours ago)",
|
||||||
ARIADNE_SCHEDULE_LAST_ERROR_HOURS,
|
ARIADNE_SCHEDULE_LAST_ERROR_HOURS,
|
||||||
{"h": 6, "w": 12, "x": 0, "y": 24},
|
{"h": 8, "w": 12, "x": 0, "y": 18},
|
||||||
unit="h",
|
unit="h",
|
||||||
transformations=sort_desc,
|
|
||||||
instant=True,
|
instant=True,
|
||||||
|
legend="{{task}}",
|
||||||
|
thresholds=recent_error_thresholds,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
bargauge_panel(
|
||||||
|
11,
|
||||||
|
"Ariadne Schedule Last Success (hours ago)",
|
||||||
|
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS,
|
||||||
|
{"h": 8, "w": 12, "x": 12, "y": 18},
|
||||||
|
unit="h",
|
||||||
|
instant=True,
|
||||||
|
legend="{{task}}",
|
||||||
|
thresholds=age_thresholds,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
bargauge_panel(
|
||||||
|
12,
|
||||||
|
"Glue Jobs Last Success (hours ago)",
|
||||||
|
GLUE_LAST_SUCCESS_AGE_HOURS,
|
||||||
|
{"h": 8, "w": 12, "x": 0, "y": 26},
|
||||||
|
unit="h",
|
||||||
|
instant=True,
|
||||||
|
legend="{{namespace}}/{{cronjob}}",
|
||||||
|
thresholds=age_thresholds,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
bargauge_panel(
|
||||||
|
13,
|
||||||
|
"Glue Jobs Last Schedule (hours ago)",
|
||||||
|
GLUE_LAST_SCHEDULE_AGE_HOURS,
|
||||||
|
{"h": 8, "w": 12, "x": 12, "y": 26},
|
||||||
|
unit="h",
|
||||||
|
instant=True,
|
||||||
|
legend="{{namespace}}/{{cronjob}}",
|
||||||
|
thresholds=age_thresholds,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
bargauge_panel(
|
||||||
|
14,
|
||||||
|
"Ariadne Access Requests",
|
||||||
|
ARIADNE_ACCESS_REQUESTS,
|
||||||
|
{"h": 6, "w": 8, "x": 0, "y": 34},
|
||||||
|
unit="none",
|
||||||
|
instant=True,
|
||||||
|
legend="{{status}}",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
stat_panel(
|
stat_panel(
|
||||||
10,
|
15,
|
||||||
"Ariadne CI Coverage (%)",
|
"Ariadne CI Coverage (%)",
|
||||||
ARIADNE_CI_COVERAGE,
|
ARIADNE_CI_COVERAGE,
|
||||||
{"h": 4, "w": 6, "x": 0, "y": 30},
|
{"h": 6, "w": 4, "x": 8, "y": 34},
|
||||||
unit="percent",
|
unit="percent",
|
||||||
decimals=1,
|
decimals=1,
|
||||||
instant=True,
|
instant=True,
|
||||||
@ -2308,12 +2380,12 @@ def build_testing_dashboard():
|
|||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
table_panel(
|
table_panel(
|
||||||
11,
|
16,
|
||||||
"Ariadne CI Tests (latest)",
|
"Ariadne CI Tests (latest)",
|
||||||
ARIADNE_CI_TESTS,
|
ARIADNE_CI_TESTS,
|
||||||
{"h": 6, "w": 18, "x": 6, "y": 30},
|
{"h": 6, "w": 12, "x": 12, "y": 34},
|
||||||
unit="none",
|
unit="none",
|
||||||
transformations=sort_desc,
|
transformations=[{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}],
|
||||||
instant=True,
|
instant=True,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user