diff --git a/scripts/monitoring_postmark_exporter.py b/scripts/monitoring_postmark_exporter.py new file mode 100644 index 0000000..dc0cac9 --- /dev/null +++ b/scripts/monitoring_postmark_exporter.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 + +import datetime as dt +import os +import time +from dataclasses import dataclass + +import requests +from prometheus_client import Gauge, Info, start_http_server + + +@dataclass(frozen=True) +class Window: + label: str + days: int + + +WINDOWS = [ + Window("1d", 1), + Window("7d", 7), +] + +API_BASE = os.environ.get("POSTMARK_API_BASE", "https://api.postmarkapp.com").rstrip("/") +POLL_INTERVAL_SECONDS = int(os.environ.get("POLL_INTERVAL_SECONDS", "60")) +LISTEN_ADDRESS = os.environ.get("LISTEN_ADDRESS", "0.0.0.0") +LISTEN_PORT = int(os.environ.get("LISTEN_PORT", "8000")) + +PRIMARY_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN", "").strip() +FALLBACK_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN_FALLBACK", "").strip() + +EXPORTER_INFO = Info("postmark_exporter", "Exporter build info") +EXPORTER_INFO.info( + { + "api_base": API_BASE, + "windows": ",".join(window.label for window in WINDOWS), + } +) + +POSTMARK_API_UP = Gauge("postmark_api_up", "Whether Postmark API is reachable (1) or not (0)") +POSTMARK_LAST_SUCCESS = Gauge( + "postmark_last_success_timestamp_seconds", + "Unix timestamp of the last successful Postmark stats refresh", +) +POSTMARK_REQUEST_ERRORS = Gauge( + "postmark_request_errors_total", + "Total Postmark stats request errors since exporter start", +) + +POSTMARK_OUTBOUND_SENT = Gauge( + "postmark_outbound_sent", + "Outbound emails sent within the selected window", + labelnames=("window",), +) +POSTMARK_OUTBOUND_BOUNCED = Gauge( + "postmark_outbound_bounced", + "Outbound emails bounced within the selected window", + labelnames=("window",), +) +POSTMARK_OUTBOUND_BOUNCE_RATE = Gauge( + "postmark_outbound_bounce_rate", + "Outbound bounce rate percentage within the selected window", + labelnames=("window",), +) + + +def fetch_outbound_stats(token: str, window: Window) -> dict: + today = dt.date.today() + fromdate = today - dt.timedelta(days=window.days) + params = {"fromdate": fromdate.isoformat(), "todate": today.isoformat()} + headers = { + "Accept": "application/json", + "X-Postmark-Server-Token": token, + } + response = requests.get( + f"{API_BASE}/stats/outbound", + headers=headers, + params=params, + timeout=15, + ) + response.raise_for_status() + return response.json() + + +def update_metrics(token: str) -> None: + for window in WINDOWS: + data = fetch_outbound_stats(token, window) + sent = int(data.get("Sent", 0) or 0) + bounced = int(data.get("Bounced", 0) or 0) + rate = (bounced / sent * 100.0) if sent else 0.0 + POSTMARK_OUTBOUND_SENT.labels(window=window.label).set(sent) + POSTMARK_OUTBOUND_BOUNCED.labels(window=window.label).set(bounced) + POSTMARK_OUTBOUND_BOUNCE_RATE.labels(window=window.label).set(rate) + + +def main() -> None: + if not PRIMARY_TOKEN and not FALLBACK_TOKEN: + raise SystemExit("POSTMARK_SERVER_TOKEN or POSTMARK_SERVER_TOKEN_FALLBACK is required") + + start_http_server(LISTEN_PORT, addr=LISTEN_ADDRESS) + + tokens = [token for token in (PRIMARY_TOKEN, FALLBACK_TOKEN) if token] + token_index = 0 + + while True: + token = tokens[token_index % len(tokens)] + token_index += 1 + try: + update_metrics(token) + POSTMARK_API_UP.set(1) + POSTMARK_LAST_SUCCESS.set(time.time()) + except Exception as exc: # noqa: BLE001 + POSTMARK_API_UP.set(0) + POSTMARK_REQUEST_ERRORS.inc() + print(f"postmark_exporter: refresh failed: {exc}", flush=True) + time.sleep(POLL_INTERVAL_SECONDS) + + +if __name__ == "__main__": + main() + diff --git a/scripts/monitoring_render_postmark_exporter.py b/scripts/monitoring_render_postmark_exporter.py new file mode 100644 index 0000000..b0a458a --- /dev/null +++ b/scripts/monitoring_render_postmark_exporter.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 + +from pathlib import Path + + +def indent(text: str, spaces: int) -> str: + prefix = " " * spaces + return "".join(prefix + line if line.strip("\n") else line for line in text.splitlines(keepends=True)) + + +def main() -> None: + root = Path(__file__).resolve().parents[1] + source = root / "scripts" / "monitoring_postmark_exporter.py" + target = root / "services" / "monitoring" / "postmark-exporter-script.yaml" + + payload = source.read_text(encoding="utf-8") + if not payload.endswith("\n"): + payload += "\n" + + yaml = ( + f"# services/monitoring/postmark-exporter-script.yaml\n" + f"apiVersion: v1\n" + f"kind: ConfigMap\n" + f"metadata:\n" + f" name: postmark-exporter-script\n" + f"data:\n" + f" monitoring_postmark_exporter.py: |\n" + f"{indent(payload, 4)}" + ) + + target.write_text(yaml, encoding="utf-8") + + +if __name__ == "__main__": + main() diff --git a/services/monitoring/kustomization.yaml b/services/monitoring/kustomization.yaml index ad53bb5..4d33ccf 100644 --- a/services/monitoring/kustomization.yaml +++ b/services/monitoring/kustomization.yaml @@ -5,6 +5,7 @@ namespace: monitoring resources: - namespace.yaml - rbac.yaml + - postmark-exporter-script.yaml - grafana-dashboard-overview.yaml - grafana-dashboard-pods.yaml - grafana-dashboard-nodes.yaml @@ -12,6 +13,8 @@ resources: - grafana-dashboard-network.yaml - grafana-dashboard-gpu.yaml - dcgm-exporter.yaml + - postmark-exporter-service.yaml + - postmark-exporter-deployment.yaml - grafana-folders.yaml - helmrelease.yaml - grafana-org-bootstrap.yaml diff --git a/services/monitoring/postmark-exporter-deployment.yaml b/services/monitoring/postmark-exporter-deployment.yaml new file mode 100644 index 0000000..80b18ec --- /dev/null +++ b/services/monitoring/postmark-exporter-deployment.yaml @@ -0,0 +1,63 @@ +# services/monitoring/postmark-exporter-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: postmark-exporter +spec: + replicas: 1 + selector: + matchLabels: + app: postmark-exporter + template: + metadata: + labels: + app: postmark-exporter + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8000" + prometheus.io/path: "/metrics" + spec: + containers: + - name: exporter + image: python:3.12-alpine + imagePullPolicy: IfNotPresent + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + pip install --no-cache-dir prometheus-client==0.22.1 requests==2.32.3 + exec python /app/monitoring_postmark_exporter.py + env: + - name: POSTMARK_SERVER_TOKEN + valueFrom: + secretKeyRef: + name: postmark-exporter + key: relay-username + - name: POSTMARK_SERVER_TOKEN_FALLBACK + valueFrom: + secretKeyRef: + name: postmark-exporter + key: relay-password + - name: POLL_INTERVAL_SECONDS + value: "60" + - name: LISTEN_PORT + value: "8000" + ports: + - name: http + containerPort: 8000 + volumeMounts: + - name: script + mountPath: /app + readOnly: true + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 250m + memory: 256Mi + volumes: + - name: script + configMap: + name: postmark-exporter-script + diff --git a/services/monitoring/postmark-exporter-script.yaml b/services/monitoring/postmark-exporter-script.yaml new file mode 100644 index 0000000..3d753fa --- /dev/null +++ b/services/monitoring/postmark-exporter-script.yaml @@ -0,0 +1,127 @@ +# services/monitoring/postmark-exporter-script.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: postmark-exporter-script +data: + monitoring_postmark_exporter.py: | + #!/usr/bin/env python3 + + import datetime as dt + import os + import time + from dataclasses import dataclass + + import requests + from prometheus_client import Gauge, Info, start_http_server + + + @dataclass(frozen=True) + class Window: + label: str + days: int + + + WINDOWS = [ + Window("1d", 1), + Window("7d", 7), + ] + + API_BASE = os.environ.get("POSTMARK_API_BASE", "https://api.postmarkapp.com").rstrip("/") + POLL_INTERVAL_SECONDS = int(os.environ.get("POLL_INTERVAL_SECONDS", "60")) + LISTEN_ADDRESS = os.environ.get("LISTEN_ADDRESS", "0.0.0.0") + LISTEN_PORT = int(os.environ.get("LISTEN_PORT", "8000")) + + PRIMARY_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN", "").strip() + FALLBACK_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN_FALLBACK", "").strip() + + EXPORTER_INFO = Info("postmark_exporter", "Exporter build info") + EXPORTER_INFO.info( + { + "api_base": API_BASE, + "windows": ",".join(window.label for window in WINDOWS), + } + ) + + POSTMARK_API_UP = Gauge("postmark_api_up", "Whether Postmark API is reachable (1) or not (0)") + POSTMARK_LAST_SUCCESS = Gauge( + "postmark_last_success_timestamp_seconds", + "Unix timestamp of the last successful Postmark stats refresh", + ) + POSTMARK_REQUEST_ERRORS = Gauge( + "postmark_request_errors_total", + "Total Postmark stats request errors since exporter start", + ) + + POSTMARK_OUTBOUND_SENT = Gauge( + "postmark_outbound_sent", + "Outbound emails sent within the selected window", + labelnames=("window",), + ) + POSTMARK_OUTBOUND_BOUNCED = Gauge( + "postmark_outbound_bounced", + "Outbound emails bounced within the selected window", + labelnames=("window",), + ) + POSTMARK_OUTBOUND_BOUNCE_RATE = Gauge( + "postmark_outbound_bounce_rate", + "Outbound bounce rate percentage within the selected window", + labelnames=("window",), + ) + + + def fetch_outbound_stats(token: str, window: Window) -> dict: + today = dt.date.today() + fromdate = today - dt.timedelta(days=window.days) + params = {"fromdate": fromdate.isoformat(), "todate": today.isoformat()} + headers = { + "Accept": "application/json", + "X-Postmark-Server-Token": token, + } + response = requests.get( + f"{API_BASE}/stats/outbound", + headers=headers, + params=params, + timeout=15, + ) + response.raise_for_status() + return response.json() + + + def update_metrics(token: str) -> None: + for window in WINDOWS: + data = fetch_outbound_stats(token, window) + sent = int(data.get("Sent", 0) or 0) + bounced = int(data.get("Bounced", 0) or 0) + rate = (bounced / sent * 100.0) if sent else 0.0 + POSTMARK_OUTBOUND_SENT.labels(window=window.label).set(sent) + POSTMARK_OUTBOUND_BOUNCED.labels(window=window.label).set(bounced) + POSTMARK_OUTBOUND_BOUNCE_RATE.labels(window=window.label).set(rate) + + + def main() -> None: + if not PRIMARY_TOKEN and not FALLBACK_TOKEN: + raise SystemExit("POSTMARK_SERVER_TOKEN or POSTMARK_SERVER_TOKEN_FALLBACK is required") + + start_http_server(LISTEN_PORT, addr=LISTEN_ADDRESS) + + tokens = [token for token in (PRIMARY_TOKEN, FALLBACK_TOKEN) if token] + token_index = 0 + + while True: + token = tokens[token_index % len(tokens)] + token_index += 1 + try: + update_metrics(token) + POSTMARK_API_UP.set(1) + POSTMARK_LAST_SUCCESS.set(time.time()) + except Exception as exc: # noqa: BLE001 + POSTMARK_API_UP.set(0) + POSTMARK_REQUEST_ERRORS.inc() + print(f"postmark_exporter: refresh failed: {exc}", flush=True) + time.sleep(POLL_INTERVAL_SECONDS) + + + if __name__ == "__main__": + main() + diff --git a/services/monitoring/postmark-exporter-service.yaml b/services/monitoring/postmark-exporter-service.yaml new file mode 100644 index 0000000..957973a --- /dev/null +++ b/services/monitoring/postmark-exporter-service.yaml @@ -0,0 +1,18 @@ +# services/monitoring/postmark-exporter-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: postmark-exporter + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8000" + prometheus.io/path: "/metrics" +spec: + type: ClusterIP + selector: + app: postmark-exporter + ports: + - name: http + port: 8000 + targetPort: http +