From fb43b02b2a2750b5cc6d2c389dc6924dbb9f9d87 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 14 Apr 2026 02:14:43 -0300 Subject: [PATCH] monitoring(soteria): tune PVC backup age thresholds for nightly cadence --- scripts/dashboards_render_atlas.py | 12 +++++++----- services/monitoring/dashboards/atlas-overview.json | 8 ++++---- services/monitoring/grafana-dashboard-overview.yaml | 8 ++++---- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 73247092..d97f28ef 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -2338,17 +2338,19 @@ def build_overview(): "mode": "absolute", "steps": [ {"color": "green", "value": None}, - {"color": "yellow", "value": 6}, - {"color": "orange", "value": 12}, - {"color": "red", "value": 24}, + {"color": "yellow", "value": 20}, + {"color": "orange", "value": 40}, + {"color": "red", "value": 50}, ], }, ) ) panels[-1]["links"] = link_to("atlas-storage") panels[-1]["description"] = ( - "Backup age in hours computed from last-success timestamps for restic-managed PVCs. " - "PVCs that have backup history but currently no successful backup (missing/no_completed/error) are pinned to 999h for visibility." + "Backup age in hours computed from last-success timestamps for restic-managed PVCs " + "(nightly target: <=20h green, <40h yellow, <50h orange, >=50h red). " + "PVCs that have backup history but currently no successful backup (missing/no_completed/error) " + "are pinned to 999h for visibility." ) panels.append( diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index ba78d653..f830e344 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -2539,15 +2539,15 @@ }, { "color": "yellow", - "value": 6 + "value": 20 }, { "color": "orange", - "value": 12 + "value": 40 }, { "color": "red", - "value": 24 + "value": 50 } ] } @@ -2583,7 +2583,7 @@ "targetBlank": true } ], - "description": "Backup age in hours computed from last-success timestamps for restic-managed PVCs. PVCs that have backup history but currently no successful backup (missing/no_completed/error) are pinned to 999h for visibility." + "description": "Backup age in hours computed from last-success timestamps for restic-managed PVCs (nightly target: <=20h green, <40h yellow, <50h orange, >=50h red). PVCs that have backup history but currently no successful backup (missing/no_completed/error) are pinned to 999h for visibility." }, { "id": 30, diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 626c4823..afd7bfa3 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -2548,15 +2548,15 @@ data: }, { "color": "yellow", - "value": 6 + "value": 20 }, { "color": "orange", - "value": 12 + "value": 40 }, { "color": "red", - "value": 24 + "value": 50 } ] } @@ -2592,7 +2592,7 @@ data: "targetBlank": true } ], - "description": "Backup age in hours computed from last-success timestamps for restic-managed PVCs. PVCs that have backup history but currently no successful backup (missing/no_completed/error) are pinned to 999h for visibility." + "description": "Backup age in hours computed from last-success timestamps for restic-managed PVCs (nightly target: <=20h green, <40h yellow, <50h orange, >=50h red). PVCs that have backup history but currently no successful backup (missing/no_completed/error) are pinned to 999h for visibility." }, { "id": 30,