Compare commits
8 Commits
cb27592272
...
de3272e160
| Author | SHA1 | Date | |
|---|---|---|---|
| de3272e160 | |||
| 8a413c0024 | |||
| aa24e08744 | |||
| b6b1e533ed | |||
| 4864939eef | |||
| 01ecb75c5b | |||
| fa30ea0ac2 | |||
| 2509d8876a |
@ -415,13 +415,25 @@ ARIADNE_TASK_FAILURES_SERIES = 'sum(increase(ariadne_task_runs_total{status="err
|
||||
ARIADNE_TASK_WARNINGS_SERIES = (
|
||||
'sum(increase(ariadne_task_runs_total{status!~"ok|error"}[$__interval])) or on() vector(0)'
|
||||
)
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600"
|
||||
ARIADNE_SCHEDULE_LAST_ERROR_HOURS = "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600"
|
||||
ARIADNE_SCHEDULE_TASK_FILTER = 'task=~"^schedule\\..+$"'
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = (
|
||||
f"(time() - ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) / 3600"
|
||||
)
|
||||
ARIADNE_SCHEDULE_LAST_ERROR_HOURS = (
|
||||
f"(time() - ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}) / 3600"
|
||||
)
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS = (
|
||||
"(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600"
|
||||
f"(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range])) / 3600"
|
||||
)
|
||||
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS = (
|
||||
"(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600"
|
||||
f"(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range])) / 3600"
|
||||
)
|
||||
ARIADNE_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_SCHEDULE_TASK_FILTER}}}"
|
||||
ARIADNE_SCHEDULE_RUNS_RANGE = (
|
||||
f'sum by (task) (increase(ariadne_task_runs_total{{{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range]))'
|
||||
)
|
||||
ARIADNE_SCHEDULE_ERRORS_RANGE = (
|
||||
f'sum by (task) (increase(ariadne_task_runs_total{{status="error",{ARIADNE_SCHEDULE_TASK_FILTER}}}[$__range]))'
|
||||
)
|
||||
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
|
||||
PLATFORM_TEST_SUITE_NAMES = [
|
||||
@ -2889,6 +2901,14 @@ def build_jobs_dashboard():
|
||||
{"color": "red", "value": 5},
|
||||
],
|
||||
}
|
||||
schedule_status_thresholds = {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{"color": "red", "value": None},
|
||||
{"color": "yellow", "value": 0.5},
|
||||
{"color": "green", "value": 1},
|
||||
],
|
||||
}
|
||||
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
@ -3029,12 +3049,13 @@ def build_jobs_dashboard():
|
||||
panels.append(
|
||||
bargauge_panel(
|
||||
11,
|
||||
"Ariadne Schedule Last Success (hours ago)",
|
||||
"Ariadne Schedule Last Success (hours ago, newest first)",
|
||||
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS,
|
||||
{"h": 6, "w": 12, "x": 12, "y": 17},
|
||||
unit="h",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
sort_order="asc",
|
||||
thresholds=age_thresholds,
|
||||
decimals=2,
|
||||
)
|
||||
@ -3149,6 +3170,53 @@ def build_jobs_dashboard():
|
||||
"Per-run interval pass points (0-100) per suite. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published."
|
||||
)
|
||||
panels.append(suite_panel)
|
||||
status_panel = bargauge_panel(
|
||||
20,
|
||||
"Ariadne Schedule Last Status",
|
||||
ARIADNE_SCHEDULE_LAST_STATUS,
|
||||
{"h": 8, "w": 8, "x": 0, "y": 35},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
sort_order="asc",
|
||||
thresholds=schedule_status_thresholds,
|
||||
decimals=0,
|
||||
)
|
||||
status_panel["description"] = "1 means the last run was ok. 0 means the last run ended in error."
|
||||
status_panel["fieldConfig"]["defaults"]["mappings"] = [
|
||||
{
|
||||
"type": "value",
|
||||
"options": {
|
||||
"0": {"text": "error"},
|
||||
"1": {"text": "ok"},
|
||||
},
|
||||
}
|
||||
]
|
||||
panels.append(status_panel)
|
||||
schedule_runs_panel = bargauge_panel(
|
||||
21,
|
||||
"Ariadne Schedule Runs (range)",
|
||||
ARIADNE_SCHEDULE_RUNS_RANGE,
|
||||
{"h": 8, "w": 8, "x": 8, "y": 35},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds={"mode": "absolute", "steps": [{"color": "green", "value": None}]},
|
||||
)
|
||||
schedule_runs_panel["description"] = "Number of runs by schedule task over the selected dashboard time range."
|
||||
panels.append(schedule_runs_panel)
|
||||
schedule_errors_panel = bargauge_panel(
|
||||
22,
|
||||
"Ariadne Schedule Errors (range)",
|
||||
ARIADNE_SCHEDULE_ERRORS_RANGE,
|
||||
{"h": 8, "w": 8, "x": 16, "y": 35},
|
||||
unit="none",
|
||||
instant=True,
|
||||
legend="{{task}}",
|
||||
thresholds=task_error_thresholds,
|
||||
)
|
||||
schedule_errors_panel["description"] = "Error run count by schedule task over the selected dashboard time range."
|
||||
panels.append(schedule_errors_panel)
|
||||
|
||||
return {
|
||||
"uid": "atlas-jobs",
|
||||
|
||||
73
scripts/verify_jenkins_workspace_cleanup_rollout.sh
Executable file
73
scripts/verify_jenkins_workspace_cleanup_rollout.sh
Executable file
@ -0,0 +1,73 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
MODE="${1:-dry-run}"
|
||||
if [[ "$MODE" != "dry-run" && "$MODE" != "active" ]]; then
|
||||
echo "usage: $0 [dry-run|active]" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
EXPECTED_DRY_RUN="true"
|
||||
PROM_MODE="dry_run"
|
||||
if [[ "$MODE" == "active" ]]; then
|
||||
EXPECTED_DRY_RUN="false"
|
||||
PROM_MODE="delete"
|
||||
fi
|
||||
|
||||
KUSTOMIZATION="${KUSTOMIZATION:-maintenance}"
|
||||
NAMESPACE="${NAMESPACE:-maintenance}"
|
||||
DEPLOYMENT="${DEPLOYMENT:-ariadne}"
|
||||
LOCAL_METRICS_PORT="${LOCAL_METRICS_PORT:-18080}"
|
||||
|
||||
for cmd in flux kubectl curl grep awk; do
|
||||
if ! command -v "$cmd" >/dev/null 2>&1; then
|
||||
echo "missing required command: $cmd" >&2
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
|
||||
echo "[1/5] reconcile Flux kustomization: ${KUSTOMIZATION}"
|
||||
flux reconcile kustomization "$KUSTOMIZATION" --namespace flux-system --with-source
|
||||
|
||||
echo "[2/5] wait for deployment rollout"
|
||||
kubectl -n "$NAMESPACE" rollout status "deployment/$DEPLOYMENT" --timeout=5m
|
||||
|
||||
echo "[3/5] verify ariadne env wiring"
|
||||
ENV_DUMP="$(kubectl -n "$NAMESPACE" get deployment "$DEPLOYMENT" -o jsonpath='{range .spec.template.spec.containers[0].env[*]}{.name}={.value}{"\n"}{end}')"
|
||||
echo "$ENV_DUMP" | grep -F "ARIADNE_SCHEDULE_JENKINS_WORKSPACE_CLEANUP=45 */6 * * *"
|
||||
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_NAMESPACE=jenkins"
|
||||
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_PVC_PREFIX=pvc-workspace-"
|
||||
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS=24"
|
||||
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_DRY_RUN=${EXPECTED_DRY_RUN}"
|
||||
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_MAX_DELETIONS_PER_RUN=20"
|
||||
|
||||
echo "[4/5] scrape /metrics and confirm cleanup metrics are exported"
|
||||
PF_LOG="$(mktemp)"
|
||||
METRICS_FILE="$(mktemp)"
|
||||
cleanup() {
|
||||
if [[ -n "${PF_PID:-}" ]]; then
|
||||
kill "$PF_PID" >/dev/null 2>&1 || true
|
||||
wait "$PF_PID" 2>/dev/null || true
|
||||
fi
|
||||
rm -f "$PF_LOG" "$METRICS_FILE"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
kubectl -n "$NAMESPACE" port-forward "deployment/$DEPLOYMENT" "${LOCAL_METRICS_PORT}:8080" >"$PF_LOG" 2>&1 &
|
||||
PF_PID=$!
|
||||
sleep 2
|
||||
curl -fsS "http://127.0.0.1:${LOCAL_METRICS_PORT}/metrics" >"$METRICS_FILE"
|
||||
grep -F "# HELP ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE"
|
||||
grep -F "# HELP ariadne_jenkins_workspace_cleanup_objects_total" "$METRICS_FILE"
|
||||
|
||||
echo "[5/5] show recent cleanup signal"
|
||||
if grep -q "ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE"; then
|
||||
grep "ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE" | grep "mode=\"${PROM_MODE}\"" || true
|
||||
else
|
||||
echo "No run counter sample yet for mode=${PROM_MODE}; wait for schedule window and re-run." >&2
|
||||
fi
|
||||
|
||||
echo "Recent cleanup logs (if any):"
|
||||
kubectl -n "$NAMESPACE" logs "deployment/$DEPLOYMENT" --tail=500 | grep -i "jenkins workspace cleanup" | tail -n 20 || true
|
||||
|
||||
echo "verification complete for mode=${MODE}"
|
||||
@ -352,7 +352,11 @@ spec:
|
||||
- name: JENKINS_WORKSPACE_PVC_PREFIX
|
||||
value: pvc-workspace-
|
||||
- name: JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS
|
||||
value: "12"
|
||||
value: "24"
|
||||
- name: JENKINS_WORKSPACE_CLEANUP_DRY_RUN
|
||||
value: "false"
|
||||
- name: JENKINS_WORKSPACE_CLEANUP_MAX_DELETIONS_PER_RUN
|
||||
value: "20"
|
||||
- name: METRICS_PATH
|
||||
value: "/metrics"
|
||||
resources:
|
||||
|
||||
@ -616,7 +616,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600)",
|
||||
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600)",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
@ -678,7 +678,7 @@
|
||||
{
|
||||
"id": 11,
|
||||
"type": "bargauge",
|
||||
"title": "Ariadne Schedule Last Success (hours ago)",
|
||||
"title": "Ariadne Schedule Last Success (hours ago, newest first)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -691,7 +691,7 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600)",
|
||||
"expr": "sort((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600)",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
@ -745,7 +745,7 @@
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
"order": "asc"
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -1268,37 +1268,37 @@
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"atlasbot\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)",
|
||||
"legendFormat": "atlasbot"
|
||||
},
|
||||
{
|
||||
"refId": "E",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"lesavka\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)",
|
||||
"legendFormat": "lesavka"
|
||||
},
|
||||
{
|
||||
"refId": "F",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"pegasus\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)",
|
||||
"legendFormat": "pegasus"
|
||||
},
|
||||
{
|
||||
"refId": "G",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"soteria\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)",
|
||||
"legendFormat": "soteria"
|
||||
},
|
||||
{
|
||||
"refId": "H",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"titan-iac\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)",
|
||||
"legendFormat": "titan-iac"
|
||||
},
|
||||
{
|
||||
"refId": "I",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"bstein-home\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)",
|
||||
"legendFormat": "bstein-home"
|
||||
},
|
||||
{
|
||||
"refId": "J",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"arcanagon\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)",
|
||||
"legendFormat": "arcanagon"
|
||||
},
|
||||
{
|
||||
@ -1334,6 +1334,229 @@
|
||||
}
|
||||
},
|
||||
"description": "Per-run interval pass points (0-100) per suite. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published."
|
||||
},
|
||||
{
|
||||
"id": 20,
|
||||
"type": "bargauge",
|
||||
"title": "Ariadne Schedule Last Status",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 35
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort(ariadne_schedule_last_status{task=~\"^schedule\\..+$\"})",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"min": 0,
|
||||
"max": null,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 0.5
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"decimals": 0,
|
||||
"mappings": [
|
||||
{
|
||||
"type": "value",
|
||||
"options": {
|
||||
"0": {
|
||||
"text": "error"
|
||||
},
|
||||
"1": {
|
||||
"text": "ok"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "asc"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "1 means the last run was ok. 0 means the last run ended in error."
|
||||
},
|
||||
{
|
||||
"id": 21,
|
||||
"type": "bargauge",
|
||||
"title": "Ariadne Schedule Runs (range)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 35
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{task=~\"^schedule\\..+$\"}[$__range])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"min": 0,
|
||||
"max": null,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Number of runs by schedule task over the selected dashboard time range."
|
||||
},
|
||||
{
|
||||
"id": 22,
|
||||
"type": "bargauge",
|
||||
"title": "Ariadne Schedule Errors (range)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 35
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\",task=~\"^schedule\\..+$\"}[$__range])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"min": 0,
|
||||
"max": null,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 3
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Error run count by schedule task over the selected dashboard time range."
|
||||
}
|
||||
],
|
||||
"time": {
|
||||
|
||||
@ -625,7 +625,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600)",
|
||||
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600)",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
@ -687,7 +687,7 @@ data:
|
||||
{
|
||||
"id": 11,
|
||||
"type": "bargauge",
|
||||
"title": "Ariadne Schedule Last Success (hours ago)",
|
||||
"title": "Ariadne Schedule Last Success (hours ago, newest first)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -700,7 +700,7 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600)",
|
||||
"expr": "sort((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^schedule\\..+$\"}[$__range])) / 3600)",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
@ -754,7 +754,7 @@ data:
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
"order": "asc"
|
||||
}
|
||||
}
|
||||
]
|
||||
@ -1277,37 +1277,37 @@ data:
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"atlasbot\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)",
|
||||
"legendFormat": "atlasbot"
|
||||
},
|
||||
{
|
||||
"refId": "E",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"lesavka\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)",
|
||||
"legendFormat": "lesavka"
|
||||
},
|
||||
{
|
||||
"refId": "F",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"pegasus\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)",
|
||||
"legendFormat": "pegasus"
|
||||
},
|
||||
{
|
||||
"refId": "G",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"soteria\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)",
|
||||
"legendFormat": "soteria"
|
||||
},
|
||||
{
|
||||
"refId": "H",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"titan-iac\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)",
|
||||
"legendFormat": "titan-iac"
|
||||
},
|
||||
{
|
||||
"refId": "I",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"bstein-home\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)",
|
||||
"legendFormat": "bstein-home"
|
||||
},
|
||||
{
|
||||
"refId": "J",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{exported_job=\"platform-quality-ci\",suite=\"arcanagon\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)",
|
||||
"legendFormat": "arcanagon"
|
||||
},
|
||||
{
|
||||
@ -1343,6 +1343,229 @@ data:
|
||||
}
|
||||
},
|
||||
"description": "Per-run interval pass points (0-100) per suite. Existing suites: ariadne, metis, ananke; additional suites appear automatically when platform_quality_gate_runs_total is published."
|
||||
},
|
||||
{
|
||||
"id": 20,
|
||||
"type": "bargauge",
|
||||
"title": "Ariadne Schedule Last Status",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 0,
|
||||
"y": 35
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort(ariadne_schedule_last_status{task=~\"^schedule\\..+$\"})",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"min": 0,
|
||||
"max": null,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 0.5
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"decimals": 0,
|
||||
"mappings": [
|
||||
{
|
||||
"type": "value",
|
||||
"options": {
|
||||
"0": {
|
||||
"text": "error"
|
||||
},
|
||||
"1": {
|
||||
"text": "ok"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "asc"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "1 means the last run was ok. 0 means the last run ended in error."
|
||||
},
|
||||
{
|
||||
"id": 21,
|
||||
"type": "bargauge",
|
||||
"title": "Ariadne Schedule Runs (range)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 8,
|
||||
"y": 35
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{task=~\"^schedule\\..+$\"}[$__range])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"min": 0,
|
||||
"max": null,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Number of runs by schedule task over the selected dashboard time range."
|
||||
},
|
||||
{
|
||||
"id": 22,
|
||||
"type": "bargauge",
|
||||
"title": "Ariadne Schedule Errors (range)",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 8,
|
||||
"x": 16,
|
||||
"y": 35
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(sum by (task) (increase(ariadne_task_runs_total{status=\"error\",task=~\"^schedule\\..+$\"}[$__range])))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{task}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "none",
|
||||
"min": 0,
|
||||
"max": null,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 1
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 3
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 5
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
}
|
||||
},
|
||||
"transformations": [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Error run count by schedule task over the selected dashboard time range."
|
||||
}
|
||||
],
|
||||
"time": {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user