monitoring: add Postmark bounce exporter

This commit is contained in:
Brad Stein 2026-01-05 21:44:29 -03:00
parent ec208fe0f6
commit d132917d9e
6 changed files with 366 additions and 0 deletions

View File

@ -0,0 +1,120 @@
#!/usr/bin/env python3
import datetime as dt
import os
import time
from dataclasses import dataclass
import requests
from prometheus_client import Gauge, Info, start_http_server
@dataclass(frozen=True)
class Window:
label: str
days: int
WINDOWS = [
Window("1d", 1),
Window("7d", 7),
]
API_BASE = os.environ.get("POSTMARK_API_BASE", "https://api.postmarkapp.com").rstrip("/")
POLL_INTERVAL_SECONDS = int(os.environ.get("POLL_INTERVAL_SECONDS", "60"))
LISTEN_ADDRESS = os.environ.get("LISTEN_ADDRESS", "0.0.0.0")
LISTEN_PORT = int(os.environ.get("LISTEN_PORT", "8000"))
PRIMARY_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN", "").strip()
FALLBACK_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN_FALLBACK", "").strip()
EXPORTER_INFO = Info("postmark_exporter", "Exporter build info")
EXPORTER_INFO.info(
{
"api_base": API_BASE,
"windows": ",".join(window.label for window in WINDOWS),
}
)
POSTMARK_API_UP = Gauge("postmark_api_up", "Whether Postmark API is reachable (1) or not (0)")
POSTMARK_LAST_SUCCESS = Gauge(
"postmark_last_success_timestamp_seconds",
"Unix timestamp of the last successful Postmark stats refresh",
)
POSTMARK_REQUEST_ERRORS = Gauge(
"postmark_request_errors_total",
"Total Postmark stats request errors since exporter start",
)
POSTMARK_OUTBOUND_SENT = Gauge(
"postmark_outbound_sent",
"Outbound emails sent within the selected window",
labelnames=("window",),
)
POSTMARK_OUTBOUND_BOUNCED = Gauge(
"postmark_outbound_bounced",
"Outbound emails bounced within the selected window",
labelnames=("window",),
)
POSTMARK_OUTBOUND_BOUNCE_RATE = Gauge(
"postmark_outbound_bounce_rate",
"Outbound bounce rate percentage within the selected window",
labelnames=("window",),
)
def fetch_outbound_stats(token: str, window: Window) -> dict:
today = dt.date.today()
fromdate = today - dt.timedelta(days=window.days)
params = {"fromdate": fromdate.isoformat(), "todate": today.isoformat()}
headers = {
"Accept": "application/json",
"X-Postmark-Server-Token": token,
}
response = requests.get(
f"{API_BASE}/stats/outbound",
headers=headers,
params=params,
timeout=15,
)
response.raise_for_status()
return response.json()
def update_metrics(token: str) -> None:
for window in WINDOWS:
data = fetch_outbound_stats(token, window)
sent = int(data.get("Sent", 0) or 0)
bounced = int(data.get("Bounced", 0) or 0)
rate = (bounced / sent * 100.0) if sent else 0.0
POSTMARK_OUTBOUND_SENT.labels(window=window.label).set(sent)
POSTMARK_OUTBOUND_BOUNCED.labels(window=window.label).set(bounced)
POSTMARK_OUTBOUND_BOUNCE_RATE.labels(window=window.label).set(rate)
def main() -> None:
if not PRIMARY_TOKEN and not FALLBACK_TOKEN:
raise SystemExit("POSTMARK_SERVER_TOKEN or POSTMARK_SERVER_TOKEN_FALLBACK is required")
start_http_server(LISTEN_PORT, addr=LISTEN_ADDRESS)
tokens = [token for token in (PRIMARY_TOKEN, FALLBACK_TOKEN) if token]
token_index = 0
while True:
token = tokens[token_index % len(tokens)]
token_index += 1
try:
update_metrics(token)
POSTMARK_API_UP.set(1)
POSTMARK_LAST_SUCCESS.set(time.time())
except Exception as exc: # noqa: BLE001
POSTMARK_API_UP.set(0)
POSTMARK_REQUEST_ERRORS.inc()
print(f"postmark_exporter: refresh failed: {exc}", flush=True)
time.sleep(POLL_INTERVAL_SECONDS)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,35 @@
#!/usr/bin/env python3
from pathlib import Path
def indent(text: str, spaces: int) -> str:
prefix = " " * spaces
return "".join(prefix + line if line.strip("\n") else line for line in text.splitlines(keepends=True))
def main() -> None:
root = Path(__file__).resolve().parents[1]
source = root / "scripts" / "monitoring_postmark_exporter.py"
target = root / "services" / "monitoring" / "postmark-exporter-script.yaml"
payload = source.read_text(encoding="utf-8")
if not payload.endswith("\n"):
payload += "\n"
yaml = (
f"# services/monitoring/postmark-exporter-script.yaml\n"
f"apiVersion: v1\n"
f"kind: ConfigMap\n"
f"metadata:\n"
f" name: postmark-exporter-script\n"
f"data:\n"
f" monitoring_postmark_exporter.py: |\n"
f"{indent(payload, 4)}"
)
target.write_text(yaml, encoding="utf-8")
if __name__ == "__main__":
main()

View File

@ -5,6 +5,7 @@ namespace: monitoring
resources: resources:
- namespace.yaml - namespace.yaml
- rbac.yaml - rbac.yaml
- postmark-exporter-script.yaml
- grafana-dashboard-overview.yaml - grafana-dashboard-overview.yaml
- grafana-dashboard-pods.yaml - grafana-dashboard-pods.yaml
- grafana-dashboard-nodes.yaml - grafana-dashboard-nodes.yaml
@ -12,6 +13,8 @@ resources:
- grafana-dashboard-network.yaml - grafana-dashboard-network.yaml
- grafana-dashboard-gpu.yaml - grafana-dashboard-gpu.yaml
- dcgm-exporter.yaml - dcgm-exporter.yaml
- postmark-exporter-service.yaml
- postmark-exporter-deployment.yaml
- grafana-folders.yaml - grafana-folders.yaml
- helmrelease.yaml - helmrelease.yaml
- grafana-org-bootstrap.yaml - grafana-org-bootstrap.yaml

View File

@ -0,0 +1,63 @@
# services/monitoring/postmark-exporter-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: postmark-exporter
spec:
replicas: 1
selector:
matchLabels:
app: postmark-exporter
template:
metadata:
labels:
app: postmark-exporter
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8000"
prometheus.io/path: "/metrics"
spec:
containers:
- name: exporter
image: python:3.12-alpine
imagePullPolicy: IfNotPresent
command: ["/bin/sh", "-c"]
args:
- |
set -euo pipefail
pip install --no-cache-dir prometheus-client==0.22.1 requests==2.32.3
exec python /app/monitoring_postmark_exporter.py
env:
- name: POSTMARK_SERVER_TOKEN
valueFrom:
secretKeyRef:
name: postmark-exporter
key: relay-username
- name: POSTMARK_SERVER_TOKEN_FALLBACK
valueFrom:
secretKeyRef:
name: postmark-exporter
key: relay-password
- name: POLL_INTERVAL_SECONDS
value: "60"
- name: LISTEN_PORT
value: "8000"
ports:
- name: http
containerPort: 8000
volumeMounts:
- name: script
mountPath: /app
readOnly: true
resources:
requests:
cpu: 50m
memory: 64Mi
limits:
cpu: 250m
memory: 256Mi
volumes:
- name: script
configMap:
name: postmark-exporter-script

View File

@ -0,0 +1,127 @@
# services/monitoring/postmark-exporter-script.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: postmark-exporter-script
data:
monitoring_postmark_exporter.py: |
#!/usr/bin/env python3
import datetime as dt
import os
import time
from dataclasses import dataclass
import requests
from prometheus_client import Gauge, Info, start_http_server
@dataclass(frozen=True)
class Window:
label: str
days: int
WINDOWS = [
Window("1d", 1),
Window("7d", 7),
]
API_BASE = os.environ.get("POSTMARK_API_BASE", "https://api.postmarkapp.com").rstrip("/")
POLL_INTERVAL_SECONDS = int(os.environ.get("POLL_INTERVAL_SECONDS", "60"))
LISTEN_ADDRESS = os.environ.get("LISTEN_ADDRESS", "0.0.0.0")
LISTEN_PORT = int(os.environ.get("LISTEN_PORT", "8000"))
PRIMARY_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN", "").strip()
FALLBACK_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN_FALLBACK", "").strip()
EXPORTER_INFO = Info("postmark_exporter", "Exporter build info")
EXPORTER_INFO.info(
{
"api_base": API_BASE,
"windows": ",".join(window.label for window in WINDOWS),
}
)
POSTMARK_API_UP = Gauge("postmark_api_up", "Whether Postmark API is reachable (1) or not (0)")
POSTMARK_LAST_SUCCESS = Gauge(
"postmark_last_success_timestamp_seconds",
"Unix timestamp of the last successful Postmark stats refresh",
)
POSTMARK_REQUEST_ERRORS = Gauge(
"postmark_request_errors_total",
"Total Postmark stats request errors since exporter start",
)
POSTMARK_OUTBOUND_SENT = Gauge(
"postmark_outbound_sent",
"Outbound emails sent within the selected window",
labelnames=("window",),
)
POSTMARK_OUTBOUND_BOUNCED = Gauge(
"postmark_outbound_bounced",
"Outbound emails bounced within the selected window",
labelnames=("window",),
)
POSTMARK_OUTBOUND_BOUNCE_RATE = Gauge(
"postmark_outbound_bounce_rate",
"Outbound bounce rate percentage within the selected window",
labelnames=("window",),
)
def fetch_outbound_stats(token: str, window: Window) -> dict:
today = dt.date.today()
fromdate = today - dt.timedelta(days=window.days)
params = {"fromdate": fromdate.isoformat(), "todate": today.isoformat()}
headers = {
"Accept": "application/json",
"X-Postmark-Server-Token": token,
}
response = requests.get(
f"{API_BASE}/stats/outbound",
headers=headers,
params=params,
timeout=15,
)
response.raise_for_status()
return response.json()
def update_metrics(token: str) -> None:
for window in WINDOWS:
data = fetch_outbound_stats(token, window)
sent = int(data.get("Sent", 0) or 0)
bounced = int(data.get("Bounced", 0) or 0)
rate = (bounced / sent * 100.0) if sent else 0.0
POSTMARK_OUTBOUND_SENT.labels(window=window.label).set(sent)
POSTMARK_OUTBOUND_BOUNCED.labels(window=window.label).set(bounced)
POSTMARK_OUTBOUND_BOUNCE_RATE.labels(window=window.label).set(rate)
def main() -> None:
if not PRIMARY_TOKEN and not FALLBACK_TOKEN:
raise SystemExit("POSTMARK_SERVER_TOKEN or POSTMARK_SERVER_TOKEN_FALLBACK is required")
start_http_server(LISTEN_PORT, addr=LISTEN_ADDRESS)
tokens = [token for token in (PRIMARY_TOKEN, FALLBACK_TOKEN) if token]
token_index = 0
while True:
token = tokens[token_index % len(tokens)]
token_index += 1
try:
update_metrics(token)
POSTMARK_API_UP.set(1)
POSTMARK_LAST_SUCCESS.set(time.time())
except Exception as exc: # noqa: BLE001
POSTMARK_API_UP.set(0)
POSTMARK_REQUEST_ERRORS.inc()
print(f"postmark_exporter: refresh failed: {exc}", flush=True)
time.sleep(POLL_INTERVAL_SECONDS)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,18 @@
# services/monitoring/postmark-exporter-service.yaml
apiVersion: v1
kind: Service
metadata:
name: postmark-exporter
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8000"
prometheus.io/path: "/metrics"
spec:
type: ClusterIP
selector:
app: postmark-exporter
ports:
- name: http
port: 8000
targetPort: http