diff --git a/services/monitoring/grafana-alerting-config.yaml b/services/monitoring/grafana-alerting-config.yaml index 3f816be3..6748a54d 100644 --- a/services/monitoring/grafana-alerting-config.yaml +++ b/services/monitoring/grafana-alerting-config.yaml @@ -22,7 +22,24 @@ data: - orgId: 1 receiver: email-admins group_by: + - grafana_folder - alertname + group_wait: 1m + group_interval: 30m + repeat_interval: 12h + routes: + - receiver: email-admins + object_matchers: + - [severity, "=", "critical"] + group_wait: 30s + group_interval: 5m + repeat_interval: 2h + - receiver: email-admins + object_matchers: + - [severity, "=", "warning"] + group_wait: 5m + group_interval: 2h + repeat_interval: 24h rules.yaml: | apiVersion: 1 groups: @@ -501,7 +518,7 @@ data: model: intervalMs: 60000 maxDataPoints: 43200 - expr: postmark_outbound_bounce_rate{window="1d"} + expr: max(postmark_outbound_bounce_rate{window="1d"}) or on() vector(0) legendFormat: bounce 1d datasource: type: prometheus @@ -530,7 +547,7 @@ data: reducer: type: last type: query - noDataState: NoData + noDataState: OK execErrState: Error annotations: summary: "Postmark 1d bounce rate >5%" @@ -549,7 +566,7 @@ data: model: intervalMs: 60000 maxDataPoints: 43200 - expr: min_over_time(max by (instance) (postmark_api_up)[5m]) + expr: max(postmark_api_up) or on() vector(0) legendFormat: api up datasource: type: prometheus @@ -578,7 +595,7 @@ data: reducer: type: last type: query - noDataState: NoData + noDataState: OK execErrState: Error annotations: summary: "Postmark exporter reports API down" diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml index ae9b73b5..55655405 100644 --- a/services/monitoring/helmrelease.yaml +++ b/services/monitoring/helmrelease.yaml @@ -286,7 +286,7 @@ spec: podAnnotations: vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/role: "monitoring" - monitoring.bstein.dev/restart-rev: "4" + monitoring.bstein.dev/restart-rev: "5" vault.hashicorp.com/agent-inject-secret-grafana-env.sh: "kv/data/atlas/monitoring/grafana-admin" vault.hashicorp.com/agent-inject-template-grafana-env.sh: | {{ with secret "kv/data/atlas/monitoring/grafana-admin" }}