monitoring: alert on soteria backup job creation spikes

This commit is contained in:
Brad Stein 2026-04-17 01:09:25 -03:00
parent 20305a7181
commit 2221a2d279

View File

@ -639,6 +639,54 @@ data:
summary: "Soteria saw >10 authorization denials in 15m"
labels:
severity: warning
- uid: maint-soteria-backup-job-storm
title: "Soteria backup job creation spike"
condition: C
for: "5m"
data:
- refId: A
relativeTimeRange:
from: 600
to: 0
datasourceUid: atlas-vm
model:
expr: sum(increase(kube_job_created{namespace="maintenance",job_name=~"soteria-backup-.*"}[10m])) or on() vector(0)
intervalMs: 60000
maxDataPoints: 43200
legendFormat: soteria-backup-jobs-created-10m
datasource:
type: prometheus
uid: atlas-vm
- refId: B
datasourceUid: __expr__
model:
expression: A
intervalMs: 60000
maxDataPoints: 43200
reducer: last
type: reduce
- refId: C
datasourceUid: __expr__
model:
expression: B
intervalMs: 60000
maxDataPoints: 43200
type: threshold
conditions:
- evaluator:
params: [8]
type: gt
operator:
type: and
reducer:
type: last
type: query
noDataState: OK
execErrState: Alerting
annotations:
summary: "Soteria created >8 backup jobs in 10m (possible scheduler storm)"
labels:
severity: warning
- orgId: 1
name: ariadne
folder: Alerts