titan-iac/services/monitoring/postmark-exporter-script.yaml

157 lines
5.3 KiB
YAML

# services/monitoring/postmark-exporter-script.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: postmark-exporter-script
data:
monitoring_postmark_exporter.py: |
#!/usr/bin/env python3
import datetime as dt
import os
import time
from dataclasses import dataclass
import requests
from prometheus_client import Gauge, Info, start_http_server
@dataclass(frozen=True)
class Window:
label: str
days: int
WINDOWS = [
Window("today", 0),
Window("1d", 1),
Window("7d", 7),
Window("30d", 30),
]
API_BASE = os.environ.get("POSTMARK_API_BASE", "https://api.postmarkapp.com").rstrip("/")
POLL_INTERVAL_SECONDS = int(os.environ.get("POLL_INTERVAL_SECONDS", "60"))
LISTEN_ADDRESS = os.environ.get("LISTEN_ADDRESS", "0.0.0.0")
LISTEN_PORT = int(os.environ.get("LISTEN_PORT", "8000"))
PRIMARY_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN", "").strip()
FALLBACK_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN_FALLBACK", "").strip()
LIMIT_WINDOW = os.environ.get("POSTMARK_SENDING_LIMIT_WINDOW", "30d").strip()
LIMIT_RAW = os.environ.get("POSTMARK_SENDING_LIMIT", "").strip()
try:
SENDING_LIMIT = float(LIMIT_RAW) if LIMIT_RAW else 0.0
except ValueError:
SENDING_LIMIT = 0.0
EXPORTER_INFO = Info("postmark_exporter", "Exporter build info")
EXPORTER_INFO.info(
{
"api_base": API_BASE,
"windows": ",".join(window.label for window in WINDOWS),
}
)
POSTMARK_API_UP = Gauge("postmark_api_up", "Whether Postmark API is reachable (1) or not (0)")
POSTMARK_LAST_SUCCESS = Gauge(
"postmark_last_success_timestamp_seconds",
"Unix timestamp of the last successful Postmark stats refresh",
)
POSTMARK_REQUEST_ERRORS = Gauge(
"postmark_request_errors_total",
"Total Postmark stats request errors since exporter start",
)
POSTMARK_OUTBOUND_SENT = Gauge(
"postmark_outbound_sent",
"Outbound emails sent within the selected window",
labelnames=("window",),
)
POSTMARK_OUTBOUND_BOUNCED = Gauge(
"postmark_outbound_bounced",
"Outbound emails bounced within the selected window",
labelnames=("window",),
)
POSTMARK_OUTBOUND_BOUNCE_RATE = Gauge(
"postmark_outbound_bounce_rate",
"Outbound bounce rate percentage within the selected window",
labelnames=("window",),
)
POSTMARK_SENDING_LIMIT_GAUGE = Gauge(
"postmark_sending_limit",
"Configured Postmark sending limit for the active account",
)
POSTMARK_SENDING_LIMIT_USED = Gauge(
"postmark_sending_limit_used",
"Messages sent within the configured send limit window",
)
POSTMARK_SENDING_LIMIT_USED_PERCENT = Gauge(
"postmark_sending_limit_used_percent",
"Percent of the configured send limit used within the limit window",
)
def fetch_outbound_stats(token: str, window: Window) -> dict:
today = dt.date.today()
fromdate = today - dt.timedelta(days=window.days)
params = {"fromdate": fromdate.isoformat(), "todate": today.isoformat()}
headers = {
"Accept": "application/json",
"X-Postmark-Server-Token": token,
}
response = requests.get(
f"{API_BASE}/stats/outbound",
headers=headers,
params=params,
timeout=15,
)
response.raise_for_status()
return response.json()
def update_metrics(token: str) -> None:
sent_by_window = {}
for window in WINDOWS:
data = fetch_outbound_stats(token, window)
sent = int(data.get("Sent", 0) or 0)
bounced = int(data.get("Bounced", 0) or 0)
rate = (bounced / sent * 100.0) if sent else 0.0
sent_by_window[window.label] = sent
POSTMARK_OUTBOUND_SENT.labels(window=window.label).set(sent)
POSTMARK_OUTBOUND_BOUNCED.labels(window=window.label).set(bounced)
POSTMARK_OUTBOUND_BOUNCE_RATE.labels(window=window.label).set(rate)
POSTMARK_SENDING_LIMIT_GAUGE.set(SENDING_LIMIT)
limit_window_sent = sent_by_window.get(LIMIT_WINDOW, 0)
POSTMARK_SENDING_LIMIT_USED.set(limit_window_sent)
if SENDING_LIMIT:
POSTMARK_SENDING_LIMIT_USED_PERCENT.set(limit_window_sent / SENDING_LIMIT * 100.0)
else:
POSTMARK_SENDING_LIMIT_USED_PERCENT.set(0.0)
def main() -> None:
if not PRIMARY_TOKEN and not FALLBACK_TOKEN:
raise SystemExit("POSTMARK_SERVER_TOKEN or POSTMARK_SERVER_TOKEN_FALLBACK is required")
start_http_server(LISTEN_PORT, addr=LISTEN_ADDRESS)
tokens = [token for token in (PRIMARY_TOKEN, FALLBACK_TOKEN) if token]
token_index = 0
while True:
token = tokens[token_index % len(tokens)]
token_index += 1
try:
update_metrics(token)
POSTMARK_API_UP.set(1)
POSTMARK_LAST_SUCCESS.set(time.time())
except Exception as exc: # noqa: BLE001
POSTMARK_API_UP.set(0)
POSTMARK_REQUEST_ERRORS.inc()
print(f"postmark_exporter: refresh failed: {exc}", flush=True)
time.sleep(POLL_INTERVAL_SECONDS)
if __name__ == "__main__":
main()