monitoring: alert on soteria backup job creation spikes
This commit is contained in:
parent
20305a7181
commit
2221a2d279
@ -639,6 +639,54 @@ data:
|
|||||||
summary: "Soteria saw >10 authorization denials in 15m"
|
summary: "Soteria saw >10 authorization denials in 15m"
|
||||||
labels:
|
labels:
|
||||||
severity: warning
|
severity: warning
|
||||||
|
- uid: maint-soteria-backup-job-storm
|
||||||
|
title: "Soteria backup job creation spike"
|
||||||
|
condition: C
|
||||||
|
for: "5m"
|
||||||
|
data:
|
||||||
|
- refId: A
|
||||||
|
relativeTimeRange:
|
||||||
|
from: 600
|
||||||
|
to: 0
|
||||||
|
datasourceUid: atlas-vm
|
||||||
|
model:
|
||||||
|
expr: sum(increase(kube_job_created{namespace="maintenance",job_name=~"soteria-backup-.*"}[10m])) or on() vector(0)
|
||||||
|
intervalMs: 60000
|
||||||
|
maxDataPoints: 43200
|
||||||
|
legendFormat: soteria-backup-jobs-created-10m
|
||||||
|
datasource:
|
||||||
|
type: prometheus
|
||||||
|
uid: atlas-vm
|
||||||
|
- refId: B
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model:
|
||||||
|
expression: A
|
||||||
|
intervalMs: 60000
|
||||||
|
maxDataPoints: 43200
|
||||||
|
reducer: last
|
||||||
|
type: reduce
|
||||||
|
- refId: C
|
||||||
|
datasourceUid: __expr__
|
||||||
|
model:
|
||||||
|
expression: B
|
||||||
|
intervalMs: 60000
|
||||||
|
maxDataPoints: 43200
|
||||||
|
type: threshold
|
||||||
|
conditions:
|
||||||
|
- evaluator:
|
||||||
|
params: [8]
|
||||||
|
type: gt
|
||||||
|
operator:
|
||||||
|
type: and
|
||||||
|
reducer:
|
||||||
|
type: last
|
||||||
|
type: query
|
||||||
|
noDataState: OK
|
||||||
|
execErrState: Alerting
|
||||||
|
annotations:
|
||||||
|
summary: "Soteria created >8 backup jobs in 10m (possible scheduler storm)"
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
- orgId: 1
|
- orgId: 1
|
||||||
name: ariadne
|
name: ariadne
|
||||||
folder: Alerts
|
folder: Alerts
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user