From a01dc0813a68e9ec542cf4e76a3cd590b84597a6 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Sun, 12 Apr 2026 19:47:58 -0300 Subject: [PATCH] maintenance(soteria): enable b2 usage scan config and alert --- services/maintenance/soteria-configmap.yaml | 8 ++++ .../monitoring/grafana-alerting-config.yaml | 48 +++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/services/maintenance/soteria-configmap.yaml b/services/maintenance/soteria-configmap.yaml index fdf666e9..e2085eca 100644 --- a/services/maintenance/soteria-configmap.yaml +++ b/services/maintenance/soteria-configmap.yaml @@ -12,3 +12,11 @@ data: SOTERIA_ALLOWED_GROUPS: admin,maintenance SOTERIA_BACKUP_MAX_AGE_HOURS: "24" SOTERIA_METRICS_REFRESH_SECONDS: "300" + SOTERIA_B2_ENABLED: "true" + SOTERIA_B2_SECRET_NAMESPACE: maintenance + SOTERIA_B2_SECRET_NAME: soteria-restic + SOTERIA_B2_ACCESS_KEY_FIELD: AWS_ACCESS_KEY_ID + SOTERIA_B2_SECRET_KEY_FIELD: AWS_SECRET_ACCESS_KEY + SOTERIA_B2_ENDPOINT_FIELD: AWS_ENDPOINTS + SOTERIA_B2_SCAN_INTERVAL_SECONDS: "900" + SOTERIA_B2_SCAN_TIMEOUT_SECONDS: "120" diff --git a/services/monitoring/grafana-alerting-config.yaml b/services/monitoring/grafana-alerting-config.yaml index be765f73..5518b2b5 100644 --- a/services/monitoring/grafana-alerting-config.yaml +++ b/services/monitoring/grafana-alerting-config.yaml @@ -543,6 +543,54 @@ data: summary: "One or more PVCs are stale, missing, or failed per Soteria backup health" labels: severity: warning + - uid: maint-soteria-b2-scan-unhealthy + title: "Soteria B2 usage scan failing or stale" + condition: C + for: "15m" + data: + - refId: A + relativeTimeRange: + from: 1800 + to: 0 + datasourceUid: atlas-vm + model: + expr: sum((((soteria_b2_scan_success < bool 1) and (time() - soteria_b2_scan_timestamp_seconds > 600)) or (time() - soteria_b2_scan_timestamp_seconds > 1800))) or on() vector(0) + intervalMs: 60000 + maxDataPoints: 43200 + legendFormat: soteria-b2-scan-unhealthy + datasource: + type: prometheus + uid: atlas-vm + - refId: B + datasourceUid: __expr__ + model: + expression: A + intervalMs: 60000 + maxDataPoints: 43200 + reducer: last + type: reduce + - refId: C + datasourceUid: __expr__ + model: + expression: B + intervalMs: 60000 + maxDataPoints: 43200 + type: threshold + conditions: + - evaluator: + params: [0] + type: gt + operator: + type: and + reducer: + type: last + type: query + noDataState: OK + execErrState: Alerting + annotations: + summary: "Soteria B2 consumption scan is failing or stale for >15m" + labels: + severity: warning - uid: maint-soteria-authz-denials title: "Soteria authorization denials elevated" condition: C