monitoring(jobs): add schedule fallback series for cold starts

This commit is contained in:
Brad Stein 2026-04-12 20:09:43 -03:00
parent a1257b65ff
commit e222344cd9
3 changed files with 38 additions and 15 deletions

View File

@ -431,6 +431,7 @@ ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS = (
ARIADNE_SCHEDULE_NEXT_RUN_HOURS = (
f"((ariadne_schedule_next_run_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}} - time()) / 3600)"
)
ARIADNE_SCHEDULE_TASK_INDEX = f"ariadne_schedule_next_run_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}"
ARIADNE_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_SCHEDULE_TASK_FILTER}}}"
ARIADNE_SCHEDULE_SIGNAL_COUNT = (
f"count(ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) or on() vector(0)"
@ -453,6 +454,21 @@ ARIADNE_SCHEDULE_RUNS_RANGE = (
ARIADNE_SCHEDULE_ERRORS_RANGE = (
f'sum by (task) (increase(ariadne_task_runs_total{{status="error",{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range]))'
)
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS_FALLBACK = (
f"({ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS}) or on(task) (0 * {ARIADNE_SCHEDULE_TASK_INDEX} + 999)"
)
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS_FALLBACK = (
f"({ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS}) or on(task) (0 * {ARIADNE_SCHEDULE_TASK_INDEX} + 999)"
)
ARIADNE_SCHEDULE_LAST_STATUS_FALLBACK = (
f"({ARIADNE_SCHEDULE_LAST_STATUS}) or on(task) (0 * {ARIADNE_SCHEDULE_TASK_INDEX} - 1)"
)
ARIADNE_SCHEDULE_RUNS_RANGE_FALLBACK = (
f"({ARIADNE_SCHEDULE_RUNS_RANGE}) or on(task) (0 * {ARIADNE_SCHEDULE_TASK_INDEX})"
)
ARIADNE_SCHEDULE_ERRORS_RANGE_FALLBACK = (
f"({ARIADNE_SCHEDULE_ERRORS_RANGE}) or on(task) (0 * {ARIADNE_SCHEDULE_TASK_INDEX})"
)
JENKINS_CLEANUP_SIGNAL_COUNT = (
"count(ariadne_jenkins_workspace_cleanup_last_run_timestamp_seconds) or on() vector(0)"
)
@ -3072,7 +3088,7 @@ def build_jobs_dashboard():
schedule_list_panel = table_panel(
7,
"Ariadne Schedules: Last Success (h, newest first)",
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS,
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS_FALLBACK,
{"h": 8, "w": 12, "x": 0, "y": 4},
unit="h",
transformations=[
@ -3087,7 +3103,7 @@ def build_jobs_dashboard():
bargauge_panel(
8,
"Ariadne Schedule Last Error (hours ago)",
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS,
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS_FALLBACK,
{"h": 8, "w": 12, "x": 12, "y": 4},
unit="h",
instant=True,
@ -3100,7 +3116,7 @@ def build_jobs_dashboard():
status_panel = bargauge_panel(
9,
"Ariadne Schedule Last Status",
ARIADNE_SCHEDULE_LAST_STATUS,
ARIADNE_SCHEDULE_LAST_STATUS_FALLBACK,
{"h": 8, "w": 8, "x": 0, "y": 12},
unit="none",
instant=True,
@ -3114,6 +3130,7 @@ def build_jobs_dashboard():
{
"type": "value",
"options": {
"-1": {"text": "pending"},
"0": {"text": "error"},
"1": {"text": "ok"},
},
@ -3123,7 +3140,7 @@ def build_jobs_dashboard():
schedule_runs_panel = bargauge_panel(
10,
"Ariadne Schedule Runs (range)",
ARIADNE_SCHEDULE_RUNS_RANGE,
ARIADNE_SCHEDULE_RUNS_RANGE_FALLBACK,
{"h": 8, "w": 8, "x": 8, "y": 12},
unit="none",
instant=True,
@ -3135,7 +3152,7 @@ def build_jobs_dashboard():
schedule_errors_panel = bargauge_panel(
11,
"Ariadne Schedule Errors (range)",
ARIADNE_SCHEDULE_ERRORS_RANGE,
ARIADNE_SCHEDULE_ERRORS_RANGE_FALLBACK,
{"h": 8, "w": 8, "x": 16, "y": 12},
unit="none",
instant=True,

View File

@ -421,7 +421,7 @@
},
"targets": [
{
"expr": "(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600",
"expr": "((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"} + 999)",
"refId": "A",
"instant": true
}
@ -472,7 +472,7 @@
},
"targets": [
{
"expr": "sort((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600)",
"expr": "sort(((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"} + 999))",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -547,7 +547,7 @@
},
"targets": [
{
"expr": "sort(ariadne_schedule_last_status{task=~\"^schedule\\..+$\"})",
"expr": "sort((ariadne_schedule_last_status{task=~\"^schedule\\..+$\"}) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"} - 1))",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -580,6 +580,9 @@
{
"type": "value",
"options": {
"-1": {
"text": "pending"
},
"0": {
"text": "error"
},
@ -632,7 +635,7 @@
},
"targets": [
{
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{task=~\"^schedule\\..+$\"}[$__range])))",
"expr": "sort_desc((sum by (task) (increase(ariadne_task_runs_total{task=~\"^schedule\\..+$\"}[$__range]))) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"}))",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -695,7 +698,7 @@
},
"targets": [
{
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\",task=~\"^schedule\\..+$\"}[$__range])))",
"expr": "sort_desc((sum by (task) (increase(ariadne_task_runs_total{status=\"error\",task=~\"^schedule\\..+$\"}[$__range]))) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"}))",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true

View File

@ -430,7 +430,7 @@ data:
},
"targets": [
{
"expr": "(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600",
"expr": "((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"} + 999)",
"refId": "A",
"instant": true
}
@ -481,7 +481,7 @@ data:
},
"targets": [
{
"expr": "sort((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600)",
"expr": "sort(((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"} + 999))",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -556,7 +556,7 @@ data:
},
"targets": [
{
"expr": "sort(ariadne_schedule_last_status{task=~\"^schedule\\..+$\"})",
"expr": "sort((ariadne_schedule_last_status{task=~\"^schedule\\..+$\"}) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"} - 1))",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -589,6 +589,9 @@ data:
{
"type": "value",
"options": {
"-1": {
"text": "pending"
},
"0": {
"text": "error"
},
@ -641,7 +644,7 @@ data:
},
"targets": [
{
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{task=~\"^schedule\\..+$\"}[$__range])))",
"expr": "sort_desc((sum by (task) (increase(ariadne_task_runs_total{task=~\"^schedule\\..+$\"}[$__range]))) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"}))",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -704,7 +707,7 @@ data:
},
"targets": [
{
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\",task=~\"^schedule\\..+$\"}[$__range])))",
"expr": "sort_desc((sum by (task) (increase(ariadne_task_runs_total{status=\"error\",task=~\"^schedule\\..+$\"}[$__range]))) or on(task) (0 * ariadne_schedule_next_run_timestamp_seconds{task=~\"^schedule\\..+$\"}))",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true