From d5d2fc66b95875d75f6495f64cc602639425888c Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Tue, 6 Jan 2026 02:06:20 -0300 Subject: [PATCH] monitoring: refine mail stats and add send-limit usage --- scripts/dashboards_render_atlas.py | 117 +++++-- scripts/monitoring_postmark_exporter.py | 29 ++ .../monitoring/dashboards/atlas-mail.json | 300 ++++++++++++++++-- .../monitoring/dashboards/atlas-overview.json | 28 +- .../monitoring/grafana-dashboard-mail.yaml | 300 ++++++++++++++++-- .../grafana-dashboard-overview.yaml | 28 +- .../postmark-exporter-deployment.yaml | 8 + .../monitoring/postmark-exporter-script.yaml | 29 ++ 8 files changed, 741 insertions(+), 98 deletions(-) diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index ea2330c..3cc9cb7 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -935,6 +935,15 @@ def build_overview(): {"color": "red", "value": 100}, ], } + mail_limit_thresholds = { + "mode": "absolute", + "steps": [ + {"color": "green", "value": None}, + {"color": "yellow", "value": 70}, + {"color": "orange", "value": 85}, + {"color": "red", "value": 95}, + ], + } mail_api_thresholds = { "mode": "absolute", "steps": [ @@ -946,28 +955,28 @@ def build_overview(): ( 30, "Mail Bounce Rate (1d)", - 'postmark_outbound_bounce_rate{window="1d"}', + 'max(postmark_outbound_bounce_rate{window="1d"})', "percent", mail_bounce_rate_thresholds, ), ( 31, "Mail Bounced (1d)", - 'postmark_outbound_bounced{window="1d"}', + 'max(postmark_outbound_bounced{window="1d"})', "none", mail_bounce_count_thresholds, ), ( 32, - "Mail Sent (1d)", - 'postmark_outbound_sent{window="1d"}', - "none", - None, + "Mail Limit Used (30d)", + "max(postmark_sending_limit_used_percent)", + "percent", + mail_limit_thresholds, ), ( 33, "Postmark API Up", - "postmark_api_up", + "max(postmark_api_up)", "none", mail_api_thresholds, ), @@ -1875,33 +1884,42 @@ def build_mail_dashboard(): {"color": "green", "value": 1}, ], } + limit_thresholds = { + "mode": "absolute", + "steps": [ + {"color": "green", "value": None}, + {"color": "yellow", "value": 70}, + {"color": "orange", "value": 85}, + {"color": "red", "value": 95}, + ], + } current_stats = [ ( 1, "Bounce Rate (1d)", - 'postmark_outbound_bounce_rate{window="1d"}', + 'max(postmark_outbound_bounce_rate{window="1d"})', "percent", bounce_rate_thresholds, ), ( 2, "Bounce Rate (7d)", - 'postmark_outbound_bounce_rate{window="7d"}', + 'max(postmark_outbound_bounce_rate{window="7d"})', "percent", bounce_rate_thresholds, ), ( 3, "Bounced (1d)", - 'postmark_outbound_bounced{window="1d"}', + 'max(postmark_outbound_bounced{window="1d"})', "none", bounce_count_thresholds, ), ( 4, "Bounced (7d)", - 'postmark_outbound_bounced{window="7d"}', + 'max(postmark_outbound_bounced{window="7d"})', "none", bounce_count_thresholds, ), @@ -1923,7 +1941,7 @@ def build_mail_dashboard(): stat_panel( 5, "Sent (1d)", - 'postmark_outbound_sent{window="1d"}', + 'max(postmark_outbound_sent{window="1d"})', {"h": 4, "w": 6, "x": 0, "y": 4}, decimals=0, ) @@ -1932,7 +1950,7 @@ def build_mail_dashboard(): stat_panel( 6, "Sent (7d)", - 'postmark_outbound_sent{window="7d"}', + 'max(postmark_outbound_sent{window="7d"})', {"h": 4, "w": 6, "x": 6, "y": 4}, decimals=0, ) @@ -1940,30 +1958,69 @@ def build_mail_dashboard(): panels.append( stat_panel( 7, - "Postmark API Up", - "postmark_api_up", + "Limit Used (30d)", + "max(postmark_sending_limit_used_percent)", {"h": 4, "w": 6, "x": 12, "y": 4}, + thresholds=limit_thresholds, + unit="percent", + decimals=1, + ) + ) + panels.append( + stat_panel( + 8, + "Send Limit (30d)", + "max(postmark_sending_limit)", + {"h": 4, "w": 6, "x": 18, "y": 4}, + decimals=0, + ) + ) + + panels.append( + stat_panel( + 9, + "Postmark API Up", + "max(postmark_api_up)", + {"h": 4, "w": 6, "x": 0, "y": 8}, thresholds=api_thresholds, decimals=0, ) ) panels.append( stat_panel( - 8, + 10, "Last Success", - "postmark_last_success_timestamp_seconds", - {"h": 4, "w": 6, "x": 18, "y": 4}, + "max(postmark_last_success_timestamp_seconds)", + {"h": 4, "w": 6, "x": 6, "y": 8}, unit="dateTimeAsIso", decimals=0, ) ) + panels.append( + stat_panel( + 11, + "Exporter Errors", + "sum(postmark_request_errors_total)", + {"h": 4, "w": 6, "x": 12, "y": 8}, + decimals=0, + ) + ) + panels.append( + stat_panel( + 12, + "Limit Used (30d)", + "max(postmark_sending_limit_used)", + {"h": 4, "w": 6, "x": 18, "y": 8}, + decimals=0, + ) + ) panels.append( timeseries_panel( - 9, + 13, "Bounce Rate (1d vs 7d)", - "postmark_outbound_bounce_rate", - {"h": 8, "w": 12, "x": 0, "y": 8}, + "max by (window) (postmark_outbound_bounce_rate)", + {"h": 8, "w": 12, "x": 0, "y": 12}, unit="percent", legend="{{window}}", legend_display="table", @@ -1972,10 +2029,10 @@ def build_mail_dashboard(): ) panels.append( timeseries_panel( - 10, + 14, "Bounced (1d vs 7d)", - "postmark_outbound_bounced", - {"h": 8, "w": 12, "x": 12, "y": 8}, + "max by (window) (postmark_outbound_bounced)", + {"h": 8, "w": 12, "x": 12, "y": 12}, unit="none", legend="{{window}}", legend_display="table", @@ -1984,10 +2041,10 @@ def build_mail_dashboard(): ) panels.append( timeseries_panel( - 11, + 15, "Sent (1d vs 7d)", - "postmark_outbound_sent", - {"h": 8, "w": 12, "x": 0, "y": 16}, + "max by (window) (postmark_outbound_sent)", + {"h": 8, "w": 12, "x": 0, "y": 20}, unit="none", legend="{{window}}", legend_display="table", @@ -1996,10 +2053,10 @@ def build_mail_dashboard(): ) panels.append( timeseries_panel( - 12, + 16, "Exporter Errors", - "postmark_request_errors_total", - {"h": 8, "w": 12, "x": 12, "y": 16}, + "sum(postmark_request_errors_total)", + {"h": 8, "w": 12, "x": 12, "y": 20}, unit="none", ) ) diff --git a/scripts/monitoring_postmark_exporter.py b/scripts/monitoring_postmark_exporter.py index ecae9f3..2a51a54 100644 --- a/scripts/monitoring_postmark_exporter.py +++ b/scripts/monitoring_postmark_exporter.py @@ -19,6 +19,7 @@ WINDOWS = [ Window("today", 0), Window("1d", 1), Window("7d", 7), + Window("30d", 30), ] API_BASE = os.environ.get("POSTMARK_API_BASE", "https://api.postmarkapp.com").rstrip("/") @@ -28,6 +29,12 @@ LISTEN_PORT = int(os.environ.get("LISTEN_PORT", "8000")) PRIMARY_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN", "").strip() FALLBACK_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN_FALLBACK", "").strip() +LIMIT_WINDOW = os.environ.get("POSTMARK_SENDING_LIMIT_WINDOW", "30d").strip() +LIMIT_RAW = os.environ.get("POSTMARK_SENDING_LIMIT", "").strip() +try: + SENDING_LIMIT = float(LIMIT_RAW) if LIMIT_RAW else 0.0 +except ValueError: + SENDING_LIMIT = 0.0 EXPORTER_INFO = Info("postmark_exporter", "Exporter build info") EXPORTER_INFO.info( @@ -62,6 +69,18 @@ POSTMARK_OUTBOUND_BOUNCE_RATE = Gauge( "Outbound bounce rate percentage within the selected window", labelnames=("window",), ) +POSTMARK_SENDING_LIMIT_GAUGE = Gauge( + "postmark_sending_limit", + "Configured Postmark sending limit for the active account", +) +POSTMARK_SENDING_LIMIT_USED = Gauge( + "postmark_sending_limit_used", + "Messages sent within the configured send limit window", +) +POSTMARK_SENDING_LIMIT_USED_PERCENT = Gauge( + "postmark_sending_limit_used_percent", + "Percent of the configured send limit used within the limit window", +) def fetch_outbound_stats(token: str, window: Window) -> dict: @@ -83,15 +102,25 @@ def fetch_outbound_stats(token: str, window: Window) -> dict: def update_metrics(token: str) -> None: + sent_by_window = {} for window in WINDOWS: data = fetch_outbound_stats(token, window) sent = int(data.get("Sent", 0) or 0) bounced = int(data.get("Bounced", 0) or 0) rate = (bounced / sent * 100.0) if sent else 0.0 + sent_by_window[window.label] = sent POSTMARK_OUTBOUND_SENT.labels(window=window.label).set(sent) POSTMARK_OUTBOUND_BOUNCED.labels(window=window.label).set(bounced) POSTMARK_OUTBOUND_BOUNCE_RATE.labels(window=window.label).set(rate) + POSTMARK_SENDING_LIMIT_GAUGE.set(SENDING_LIMIT) + limit_window_sent = sent_by_window.get(LIMIT_WINDOW, 0) + POSTMARK_SENDING_LIMIT_USED.set(limit_window_sent) + if SENDING_LIMIT: + POSTMARK_SENDING_LIMIT_USED_PERCENT.set(limit_window_sent / SENDING_LIMIT * 100.0) + else: + POSTMARK_SENDING_LIMIT_USED_PERCENT.set(0.0) + def main() -> None: if not PRIMARY_TOKEN and not FALLBACK_TOKEN: diff --git a/services/monitoring/dashboards/atlas-mail.json b/services/monitoring/dashboards/atlas-mail.json index 1e73d76..a0b733d 100644 --- a/services/monitoring/dashboards/atlas-mail.json +++ b/services/monitoring/dashboards/atlas-mail.json @@ -20,7 +20,7 @@ }, "targets": [ { - "expr": "postmark_outbound_bounce_rate{window=\"1d\"}", + "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", "refId": "A" } ], @@ -89,7 +89,7 @@ }, "targets": [ { - "expr": "postmark_outbound_bounce_rate{window=\"7d\"}", + "expr": "max(postmark_outbound_bounce_rate{window=\"7d\"})", "refId": "A" } ], @@ -158,7 +158,7 @@ }, "targets": [ { - "expr": "postmark_outbound_bounced{window=\"1d\"}", + "expr": "max(postmark_outbound_bounced{window=\"1d\"})", "refId": "A" } ], @@ -227,7 +227,7 @@ }, "targets": [ { - "expr": "postmark_outbound_bounced{window=\"7d\"}", + "expr": "max(postmark_outbound_bounced{window=\"7d\"})", "refId": "A" } ], @@ -296,7 +296,7 @@ }, "targets": [ { - "expr": "postmark_outbound_sent{window=\"1d\"}", + "expr": "max(postmark_outbound_sent{window=\"1d\"})", "refId": "A" } ], @@ -357,7 +357,7 @@ }, "targets": [ { - "expr": "postmark_outbound_sent{window=\"7d\"}", + "expr": "max(postmark_outbound_sent{window=\"7d\"})", "refId": "A" } ], @@ -405,7 +405,7 @@ { "id": 7, "type": "stat", - "title": "Postmark API Up", + "title": "Limit Used (30d)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -418,7 +418,137 @@ }, "targets": [ { - "expr": "postmark_api_up", + "expr": "max(postmark_sending_limit_used_percent)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "orange", + "value": 85 + }, + { + "color": "red", + "value": 95 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 8, + "type": "stat", + "title": "Send Limit (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 4 + }, + "targets": [ + { + "expr": "max(postmark_sending_limit)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 9, + "type": "stat", + "title": "Postmark API Up", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 8 + }, + "targets": [ + { + "expr": "max(postmark_api_up)", "refId": "A" } ], @@ -464,7 +594,7 @@ } }, { - "id": 8, + "id": 10, "type": "stat", "title": "Last Success", "datasource": { @@ -474,12 +604,12 @@ "gridPos": { "h": 4, "w": 6, - "x": 18, - "y": 4 + "x": 6, + "y": 8 }, "targets": [ { - "expr": "postmark_last_success_timestamp_seconds", + "expr": "max(postmark_last_success_timestamp_seconds)", "refId": "A" } ], @@ -525,7 +655,129 @@ } }, { - "id": 9, + "id": 11, + "type": "stat", + "title": "Exporter Errors", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 8 + }, + "targets": [ + { + "expr": "sum(postmark_request_errors_total)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 12, + "type": "stat", + "title": "Limit Used (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 8 + }, + "targets": [ + { + "expr": "max(postmark_sending_limit_used)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 13, "type": "timeseries", "title": "Bounce Rate (1d vs 7d)", "datasource": { @@ -536,11 +788,11 @@ "h": 8, "w": 12, "x": 0, - "y": 8 + "y": 12 }, "targets": [ { - "expr": "postmark_outbound_bounce_rate", + "expr": "max by (window) (postmark_outbound_bounce_rate)", "refId": "A", "legendFormat": "{{window}}" } @@ -562,7 +814,7 @@ } }, { - "id": 10, + "id": 14, "type": "timeseries", "title": "Bounced (1d vs 7d)", "datasource": { @@ -573,11 +825,11 @@ "h": 8, "w": 12, "x": 12, - "y": 8 + "y": 12 }, "targets": [ { - "expr": "postmark_outbound_bounced", + "expr": "max by (window) (postmark_outbound_bounced)", "refId": "A", "legendFormat": "{{window}}" } @@ -599,7 +851,7 @@ } }, { - "id": 11, + "id": 15, "type": "timeseries", "title": "Sent (1d vs 7d)", "datasource": { @@ -610,11 +862,11 @@ "h": 8, "w": 12, "x": 0, - "y": 16 + "y": 20 }, "targets": [ { - "expr": "postmark_outbound_sent", + "expr": "max by (window) (postmark_outbound_sent)", "refId": "A", "legendFormat": "{{window}}" } @@ -636,7 +888,7 @@ } }, { - "id": 12, + "id": 16, "type": "timeseries", "title": "Exporter Errors", "datasource": { @@ -647,11 +899,11 @@ "h": 8, "w": 12, "x": 12, - "y": 16 + "y": 20 }, "targets": [ { - "expr": "postmark_request_errors_total", + "expr": "sum(postmark_request_errors_total)", "refId": "A" } ], diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index 4938485..707cc30 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -802,7 +802,7 @@ }, "targets": [ { - "expr": "postmark_outbound_bounce_rate{window=\"1d\"}", + "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", "refId": "A" } ], @@ -878,7 +878,7 @@ }, "targets": [ { - "expr": "postmark_outbound_bounced{window=\"1d\"}", + "expr": "max(postmark_outbound_bounced{window=\"1d\"})", "refId": "A" } ], @@ -941,7 +941,7 @@ { "id": 32, "type": "stat", - "title": "Mail Sent (1d)", + "title": "Mail Limit Used (30d)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -954,7 +954,7 @@ }, "targets": [ { - "expr": "postmark_outbound_sent{window=\"1d\"}", + "expr": "max(postmark_sending_limit_used_percent)", "refId": "A" } ], @@ -968,20 +968,28 @@ "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "green", "value": null }, { - "color": "green", - "value": 1 + "color": "yellow", + "value": 70 + }, + { + "color": "orange", + "value": 85 + }, + { + "color": "red", + "value": 95 } ] }, - "unit": "none", + "unit": "percent", "custom": { "displayMode": "auto" }, - "decimals": 0 + "decimals": 1 }, "overrides": [] }, @@ -1022,7 +1030,7 @@ }, "targets": [ { - "expr": "postmark_api_up", + "expr": "max(postmark_api_up)", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-mail.yaml b/services/monitoring/grafana-dashboard-mail.yaml index 6c96358..f97ce60 100644 --- a/services/monitoring/grafana-dashboard-mail.yaml +++ b/services/monitoring/grafana-dashboard-mail.yaml @@ -29,7 +29,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_bounce_rate{window=\"1d\"}", + "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", "refId": "A" } ], @@ -98,7 +98,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_bounce_rate{window=\"7d\"}", + "expr": "max(postmark_outbound_bounce_rate{window=\"7d\"})", "refId": "A" } ], @@ -167,7 +167,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_bounced{window=\"1d\"}", + "expr": "max(postmark_outbound_bounced{window=\"1d\"})", "refId": "A" } ], @@ -236,7 +236,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_bounced{window=\"7d\"}", + "expr": "max(postmark_outbound_bounced{window=\"7d\"})", "refId": "A" } ], @@ -305,7 +305,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_sent{window=\"1d\"}", + "expr": "max(postmark_outbound_sent{window=\"1d\"})", "refId": "A" } ], @@ -366,7 +366,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_sent{window=\"7d\"}", + "expr": "max(postmark_outbound_sent{window=\"7d\"})", "refId": "A" } ], @@ -414,7 +414,7 @@ data: { "id": 7, "type": "stat", - "title": "Postmark API Up", + "title": "Limit Used (30d)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -427,7 +427,137 @@ data: }, "targets": [ { - "expr": "postmark_api_up", + "expr": "max(postmark_sending_limit_used_percent)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "orange", + "value": 85 + }, + { + "color": "red", + "value": 95 + } + ] + }, + "unit": "percent", + "custom": { + "displayMode": "auto" + }, + "decimals": 1 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 8, + "type": "stat", + "title": "Send Limit (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 4 + }, + "targets": [ + { + "expr": "max(postmark_sending_limit)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 9, + "type": "stat", + "title": "Postmark API Up", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 8 + }, + "targets": [ + { + "expr": "max(postmark_api_up)", "refId": "A" } ], @@ -473,7 +603,7 @@ data: } }, { - "id": 8, + "id": 10, "type": "stat", "title": "Last Success", "datasource": { @@ -483,12 +613,12 @@ data: "gridPos": { "h": 4, "w": 6, - "x": 18, - "y": 4 + "x": 6, + "y": 8 }, "targets": [ { - "expr": "postmark_last_success_timestamp_seconds", + "expr": "max(postmark_last_success_timestamp_seconds)", "refId": "A" } ], @@ -534,7 +664,129 @@ data: } }, { - "id": 9, + "id": 11, + "type": "stat", + "title": "Exporter Errors", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 8 + }, + "targets": [ + { + "expr": "sum(postmark_request_errors_total)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 12, + "type": "stat", + "title": "Limit Used (30d)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 8 + }, + "targets": [ + { + "expr": "max(postmark_sending_limit_used)", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(115, 115, 115, 1)", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + }, + "decimals": 0 + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 13, "type": "timeseries", "title": "Bounce Rate (1d vs 7d)", "datasource": { @@ -545,11 +797,11 @@ data: "h": 8, "w": 12, "x": 0, - "y": 8 + "y": 12 }, "targets": [ { - "expr": "postmark_outbound_bounce_rate", + "expr": "max by (window) (postmark_outbound_bounce_rate)", "refId": "A", "legendFormat": "{{window}}" } @@ -571,7 +823,7 @@ data: } }, { - "id": 10, + "id": 14, "type": "timeseries", "title": "Bounced (1d vs 7d)", "datasource": { @@ -582,11 +834,11 @@ data: "h": 8, "w": 12, "x": 12, - "y": 8 + "y": 12 }, "targets": [ { - "expr": "postmark_outbound_bounced", + "expr": "max by (window) (postmark_outbound_bounced)", "refId": "A", "legendFormat": "{{window}}" } @@ -608,7 +860,7 @@ data: } }, { - "id": 11, + "id": 15, "type": "timeseries", "title": "Sent (1d vs 7d)", "datasource": { @@ -619,11 +871,11 @@ data: "h": 8, "w": 12, "x": 0, - "y": 16 + "y": 20 }, "targets": [ { - "expr": "postmark_outbound_sent", + "expr": "max by (window) (postmark_outbound_sent)", "refId": "A", "legendFormat": "{{window}}" } @@ -645,7 +897,7 @@ data: } }, { - "id": 12, + "id": 16, "type": "timeseries", "title": "Exporter Errors", "datasource": { @@ -656,11 +908,11 @@ data: "h": 8, "w": 12, "x": 12, - "y": 16 + "y": 20 }, "targets": [ { - "expr": "postmark_request_errors_total", + "expr": "sum(postmark_request_errors_total)", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index 7ebb687..65ae053 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -811,7 +811,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_bounce_rate{window=\"1d\"}", + "expr": "max(postmark_outbound_bounce_rate{window=\"1d\"})", "refId": "A" } ], @@ -887,7 +887,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_bounced{window=\"1d\"}", + "expr": "max(postmark_outbound_bounced{window=\"1d\"})", "refId": "A" } ], @@ -950,7 +950,7 @@ data: { "id": 32, "type": "stat", - "title": "Mail Sent (1d)", + "title": "Mail Limit Used (30d)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -963,7 +963,7 @@ data: }, "targets": [ { - "expr": "postmark_outbound_sent{window=\"1d\"}", + "expr": "max(postmark_sending_limit_used_percent)", "refId": "A" } ], @@ -977,20 +977,28 @@ data: "mode": "absolute", "steps": [ { - "color": "rgba(115, 115, 115, 1)", + "color": "green", "value": null }, { - "color": "green", - "value": 1 + "color": "yellow", + "value": 70 + }, + { + "color": "orange", + "value": 85 + }, + { + "color": "red", + "value": 95 } ] }, - "unit": "none", + "unit": "percent", "custom": { "displayMode": "auto" }, - "decimals": 0 + "decimals": 1 }, "overrides": [] }, @@ -1031,7 +1039,7 @@ data: }, "targets": [ { - "expr": "postmark_api_up", + "expr": "max(postmark_api_up)", "refId": "A" } ], diff --git a/services/monitoring/postmark-exporter-deployment.yaml b/services/monitoring/postmark-exporter-deployment.yaml index adadc4b..eb2877e 100644 --- a/services/monitoring/postmark-exporter-deployment.yaml +++ b/services/monitoring/postmark-exporter-deployment.yaml @@ -39,6 +39,14 @@ spec: secretKeyRef: name: postmark-exporter key: relay-password + - name: POSTMARK_SENDING_LIMIT + valueFrom: + secretKeyRef: + name: postmark-exporter + key: sending-limit + optional: true + - name: POSTMARK_SENDING_LIMIT_WINDOW + value: "30d" - name: POLL_INTERVAL_SECONDS value: "60" - name: LISTEN_PORT diff --git a/services/monitoring/postmark-exporter-script.yaml b/services/monitoring/postmark-exporter-script.yaml index 884d963..afe2221 100644 --- a/services/monitoring/postmark-exporter-script.yaml +++ b/services/monitoring/postmark-exporter-script.yaml @@ -26,6 +26,7 @@ data: Window("today", 0), Window("1d", 1), Window("7d", 7), + Window("30d", 30), ] API_BASE = os.environ.get("POSTMARK_API_BASE", "https://api.postmarkapp.com").rstrip("/") @@ -35,6 +36,12 @@ data: PRIMARY_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN", "").strip() FALLBACK_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN_FALLBACK", "").strip() + LIMIT_WINDOW = os.environ.get("POSTMARK_SENDING_LIMIT_WINDOW", "30d").strip() + LIMIT_RAW = os.environ.get("POSTMARK_SENDING_LIMIT", "").strip() + try: + SENDING_LIMIT = float(LIMIT_RAW) if LIMIT_RAW else 0.0 + except ValueError: + SENDING_LIMIT = 0.0 EXPORTER_INFO = Info("postmark_exporter", "Exporter build info") EXPORTER_INFO.info( @@ -69,6 +76,18 @@ data: "Outbound bounce rate percentage within the selected window", labelnames=("window",), ) + POSTMARK_SENDING_LIMIT_GAUGE = Gauge( + "postmark_sending_limit", + "Configured Postmark sending limit for the active account", + ) + POSTMARK_SENDING_LIMIT_USED = Gauge( + "postmark_sending_limit_used", + "Messages sent within the configured send limit window", + ) + POSTMARK_SENDING_LIMIT_USED_PERCENT = Gauge( + "postmark_sending_limit_used_percent", + "Percent of the configured send limit used within the limit window", + ) def fetch_outbound_stats(token: str, window: Window) -> dict: @@ -90,15 +109,25 @@ data: def update_metrics(token: str) -> None: + sent_by_window = {} for window in WINDOWS: data = fetch_outbound_stats(token, window) sent = int(data.get("Sent", 0) or 0) bounced = int(data.get("Bounced", 0) or 0) rate = (bounced / sent * 100.0) if sent else 0.0 + sent_by_window[window.label] = sent POSTMARK_OUTBOUND_SENT.labels(window=window.label).set(sent) POSTMARK_OUTBOUND_BOUNCED.labels(window=window.label).set(bounced) POSTMARK_OUTBOUND_BOUNCE_RATE.labels(window=window.label).set(rate) + POSTMARK_SENDING_LIMIT_GAUGE.set(SENDING_LIMIT) + limit_window_sent = sent_by_window.get(LIMIT_WINDOW, 0) + POSTMARK_SENDING_LIMIT_USED.set(limit_window_sent) + if SENDING_LIMIT: + POSTMARK_SENDING_LIMIT_USED_PERCENT.set(limit_window_sent / SENDING_LIMIT * 100.0) + else: + POSTMARK_SENDING_LIMIT_USED_PERCENT.set(0.0) + def main() -> None: if not PRIMARY_TOKEN and not FALLBACK_TOKEN: