From fb6ddce0c72105cc5812e80b5fd5546fd042ad7a Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Wed, 21 Jan 2026 02:57:40 -0300 Subject: [PATCH] glue: centralize sync tasks in ariadne --- .../cert-manager/letsencrypt-prod.yaml | 2 +- .../sources/cert-manager/letsencrypt.yaml | 2 +- scripts/dashboards_render_atlas.py | 35 +++++- services/finance/firefly-cronjob.yaml | 1 + services/keycloak/deployment.yaml | 2 +- services/keycloak/realm-settings-job.yaml | 73 +++++++++++++ .../logging/opensearch-prune-cronjob.yaml | 1 + services/mailu/kustomization.yaml | 5 - services/maintenance/ariadne-deployment.yaml | 12 +- .../maintenance/image-sweeper-cronjob.yaml | 1 + services/maintenance/pod-cleaner-cronjob.yaml | 1 + .../monitoring/dashboards/atlas-testing.json | 103 ++++++++++++++++-- .../monitoring/grafana-dashboard-testing.yaml | 103 ++++++++++++++++-- services/nextcloud/cronjob.yaml | 1 + services/nextcloud/maintenance-cronjob.yaml | 1 + 15 files changed, 313 insertions(+), 30 deletions(-) diff --git a/infrastructure/sources/cert-manager/letsencrypt-prod.yaml b/infrastructure/sources/cert-manager/letsencrypt-prod.yaml index 7f90f01..5795b09 100644 --- a/infrastructure/sources/cert-manager/letsencrypt-prod.yaml +++ b/infrastructure/sources/cert-manager/letsencrypt-prod.yaml @@ -5,7 +5,7 @@ metadata: name: letsencrypt-prod spec: acme: - email: brad.stein@gmail.com + email: brad@bstein.dev server: https://acme-v02.api.letsencrypt.org/directory privateKeySecretRef: name: letsencrypt-prod-account-key diff --git a/infrastructure/sources/cert-manager/letsencrypt.yaml b/infrastructure/sources/cert-manager/letsencrypt.yaml index a988312..5fbe4e3 100644 --- a/infrastructure/sources/cert-manager/letsencrypt.yaml +++ b/infrastructure/sources/cert-manager/letsencrypt.yaml @@ -5,7 +5,7 @@ metadata: name: letsencrypt spec: acme: - email: brad.stein@gmail.com + email: brad@bstein.dev server: https://acme-v02.api.letsencrypt.org/directory privateKeySecretRef: name: letsencrypt-account-key diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index a3fb372..509cf49 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -338,7 +338,9 @@ GLUE_MISSING_COUNT = f"count({GLUE_MISSING_ACTIVE})" GLUE_SUSPENDED_COUNT = f"sum({GLUE_SUSPENDED})" ARIADNE_TASK_ERRORS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[24h]))' ARIADNE_TASK_SUCCESS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="ok"}[24h]))' +ARIADNE_TASK_RUNS_BY_STATUS_1H = 'sum by (status) (increase(ariadne_task_runs_total[1h]))' ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600" +ARIADNE_SCHEDULE_LAST_ERROR_HOURS = "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600" ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total" ARIADNE_CI_COVERAGE = 'ariadne_ci_coverage_percent{repo="ariadne"}' ARIADNE_CI_TESTS = 'ariadne_ci_tests_total{repo="ariadne"}' @@ -2236,12 +2238,24 @@ def build_testing_dashboard(): instant=True, ) ) + panels.append( + timeseries_panel( + 12, + "Ariadne Task Runs vs Errors (1h)", + ARIADNE_TASK_RUNS_BY_STATUS_1H, + {"h": 6, "w": 24, "x": 0, "y": 12}, + unit="none", + legend="{{status}}", + legend_display="table", + legend_placement="right", + ) + ) panels.append( table_panel( 7, "Ariadne Task Errors (24h)", ARIADNE_TASK_ERRORS_24H, - {"h": 6, "w": 12, "x": 0, "y": 12}, + {"h": 6, "w": 12, "x": 0, "y": 18}, unit="none", transformations=sort_desc, instant=True, @@ -2252,7 +2266,7 @@ def build_testing_dashboard(): 8, "Ariadne Schedule Last Success (hours ago)", ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS, - {"h": 6, "w": 12, "x": 12, "y": 12}, + {"h": 6, "w": 12, "x": 12, "y": 18}, unit="h", transformations=sort_desc, instant=True, @@ -2263,18 +2277,29 @@ def build_testing_dashboard(): 9, "Ariadne Access Requests", ARIADNE_ACCESS_REQUESTS, - {"h": 4, "w": 24, "x": 0, "y": 18}, + {"h": 6, "w": 12, "x": 12, "y": 24}, unit="none", transformations=sort_desc, instant=True, ) ) + panels.append( + table_panel( + 13, + "Ariadne Schedule Last Error (hours ago)", + ARIADNE_SCHEDULE_LAST_ERROR_HOURS, + {"h": 6, "w": 12, "x": 0, "y": 24}, + unit="h", + transformations=sort_desc, + instant=True, + ) + ) panels.append( stat_panel( 10, "Ariadne CI Coverage (%)", ARIADNE_CI_COVERAGE, - {"h": 4, "w": 6, "x": 0, "y": 22}, + {"h": 4, "w": 6, "x": 0, "y": 30}, unit="percent", decimals=1, instant=True, @@ -2286,7 +2311,7 @@ def build_testing_dashboard(): 11, "Ariadne CI Tests (latest)", ARIADNE_CI_TESTS, - {"h": 6, "w": 18, "x": 6, "y": 22}, + {"h": 6, "w": 18, "x": 6, "y": 30}, unit="none", transformations=sort_desc, instant=True, diff --git a/services/finance/firefly-cronjob.yaml b/services/finance/firefly-cronjob.yaml index 6c4d507..9e5c852 100644 --- a/services/finance/firefly-cronjob.yaml +++ b/services/finance/firefly-cronjob.yaml @@ -6,6 +6,7 @@ metadata: namespace: finance spec: schedule: "0 3 * * *" + suspend: true concurrencyPolicy: Forbid successfulJobsHistoryLimit: 1 failedJobsHistoryLimit: 3 diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index 3d241c9..131169d 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -126,7 +126,7 @@ spec: - name: KC_EVENTS_LISTENERS value: jboss-logging,mailu-http - name: KC_SPI_EVENTS_LISTENER_MAILU-HTTP_ENDPOINT - value: http://mailu-sync-listener.mailu-mailserver.svc.cluster.local:8080/events + value: http://ariadne.maintenance.svc.cluster.local/events ports: - containerPort: 8080 name: http diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index fdee377..786948b 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -469,6 +469,79 @@ spec: if status not in (201, 204): raise SystemExit(f"Unexpected protocol mapper create response: {status}") + # Ensure mailu_email overrides email claim for service clients. + excluded_email_clients = { + "account", + "account-console", + "admin-cli", + "security-admin-console", + "realm-management", + "broker", + } + status, clients = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients", + access_token, + ) + if status == 200 and isinstance(clients, list): + for client in clients: + if not isinstance(client, dict): + continue + if client.get("protocol") != "openid-connect": + continue + client_name = client.get("clientId") if isinstance(client.get("clientId"), str) else "" + if not client_name or client_name in excluded_email_clients: + continue + client_id = client.get("id") + if not client_id: + continue + email_mapper = { + "name": "mailu-email", + "protocol": "openid-connect", + "protocolMapper": "oidc-usermodel-attribute-mapper", + "consentRequired": False, + "config": { + "user.attribute": "mailu_email", + "claim.name": "email", + "jsonType.label": "String", + "id.token.claim": "true", + "access.token.claim": "true", + "userinfo.token.claim": "true", + "multivalued": "false", + "aggregate.attrs": "false", + }, + } + status, mappers = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients/{client_id}/protocol-mappers/models", + access_token, + ) + existing = None + if status == 200 and isinstance(mappers, list): + for item in mappers: + if isinstance(item, dict) and item.get("name") == email_mapper["name"]: + existing = item + break + if existing and existing.get("id"): + email_mapper["id"] = existing["id"] + status, _ = http_json( + "PUT", + f"{base_url}/admin/realms/{realm}/clients/{client_id}/protocol-mappers/models/{existing['id']}", + access_token, + email_mapper, + ) + if status not in (200, 204): + raise SystemExit(f"Unexpected mailu email mapper update response: {status}") + else: + status, _ = http_json( + "POST", + f"{base_url}/admin/realms/{realm}/clients/{client_id}/protocol-mappers/models", + access_token, + email_mapper, + ) + if status not in (201, 204): + raise SystemExit(f"Unexpected mailu email mapper create response: {status}") + # Ensure MFA is on by default for newly-created users. status, required_actions = http_json( "GET", diff --git a/services/logging/opensearch-prune-cronjob.yaml b/services/logging/opensearch-prune-cronjob.yaml index 75e72db..dc0dffb 100644 --- a/services/logging/opensearch-prune-cronjob.yaml +++ b/services/logging/opensearch-prune-cronjob.yaml @@ -6,6 +6,7 @@ metadata: namespace: logging spec: schedule: "23 3 * * *" + suspend: true concurrencyPolicy: Forbid successfulJobsHistoryLimit: 1 failedJobsHistoryLimit: 3 diff --git a/services/mailu/kustomization.yaml b/services/mailu/kustomization.yaml index 5c111eb..7447f24 100644 --- a/services/mailu/kustomization.yaml +++ b/services/mailu/kustomization.yaml @@ -15,7 +15,6 @@ resources: - ingressroute.yaml - mailu-sync-job.yaml - mailu-sync-cronjob.yaml - - mailu-sync-listener.yaml - front-lb.yaml configMapGenerator: @@ -31,10 +30,6 @@ configMapGenerator: - sync.py=scripts/mailu_sync.py options: disableNameSuffixHash: true - - name: mailu-sync-listener - namespace: mailu-mailserver - files: - - listener.py=scripts/mailu_sync_listener.py - name: mailu-vault-entrypoint namespace: mailu-mailserver files: diff --git a/services/maintenance/ariadne-deployment.yaml b/services/maintenance/ariadne-deployment.yaml index 57ce72b..57862ab 100644 --- a/services/maintenance/ariadne-deployment.yaml +++ b/services/maintenance/ariadne-deployment.yaml @@ -23,6 +23,7 @@ spec: vault.hashicorp.com/agent-inject-secret-ariadne-env.sh: "kv/data/atlas/maintenance/ariadne-db" vault.hashicorp.com/agent-inject-template-ariadne-env.sh: | {{ with secret "kv/data/atlas/maintenance/ariadne-db" }} + export ARIADNE_DATABASE_URL="{{ .Data.data.database_url }}" export PORTAL_DATABASE_URL="{{ .Data.data.database_url }}" {{ end }} {{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }} @@ -57,6 +58,7 @@ spec: export SMTP_USERNAME="no-reply-portal@bstein.dev" export SMTP_PASSWORD="{{ .Data.data.password }}" export SMTP_FROM="no-reply-portal@bstein.dev" + export MAILU_SYSTEM_PASSWORD="{{ .Data.data.password }}" {{ end }} {{ with secret "kv/data/atlas/comms/mas-admin-client-runtime" }} export COMMS_MAS_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}" @@ -140,7 +142,11 @@ spec: - name: MAILU_HOST value: mail.bstein.dev - name: MAILU_SYNC_URL - value: http://mailu-sync-listener.mailu-mailserver.svc.cluster.local:8080/events + value: http://ariadne.maintenance.svc.cluster.local/events + - name: MAILU_EVENT_MIN_INTERVAL_SEC + value: "10" + - name: MAILU_SYSTEM_USERS + value: no-reply-portal@bstein.dev,no-reply-vaultwarden@bstein.dev - name: MAILU_MAILBOX_WAIT_TIMEOUT_SEC value: "180" - name: MAILU_DB_HOST @@ -263,8 +269,12 @@ spec: value: "30 4 * * *" - name: ARIADNE_SCHEDULE_VAULTWARDEN_SYNC value: "*/15 * * * *" + - name: ARIADNE_SCHEDULE_WGER_USER_SYNC + value: "0 5 * * *" - name: ARIADNE_SCHEDULE_WGER_ADMIN value: "15 3 * * *" + - name: ARIADNE_SCHEDULE_FIREFLY_USER_SYNC + value: "0 6 * * *" - name: ARIADNE_SCHEDULE_FIREFLY_CRON value: "0 3 * * *" - name: ARIADNE_SCHEDULE_POD_CLEANER diff --git a/services/maintenance/image-sweeper-cronjob.yaml b/services/maintenance/image-sweeper-cronjob.yaml index c94fcca..0039206 100644 --- a/services/maintenance/image-sweeper-cronjob.yaml +++ b/services/maintenance/image-sweeper-cronjob.yaml @@ -6,6 +6,7 @@ metadata: namespace: maintenance spec: schedule: "30 4 * * 0" + suspend: true concurrencyPolicy: Forbid successfulJobsHistoryLimit: 2 failedJobsHistoryLimit: 2 diff --git a/services/maintenance/pod-cleaner-cronjob.yaml b/services/maintenance/pod-cleaner-cronjob.yaml index e083c85..99d13f6 100644 --- a/services/maintenance/pod-cleaner-cronjob.yaml +++ b/services/maintenance/pod-cleaner-cronjob.yaml @@ -6,6 +6,7 @@ metadata: namespace: maintenance spec: schedule: "0 * * * *" + suspend: true concurrencyPolicy: Forbid successfulJobsHistoryLimit: 1 failedJobsHistoryLimit: 3 diff --git a/services/monitoring/dashboards/atlas-testing.json b/services/monitoring/dashboards/atlas-testing.json index b76f909..207077e 100644 --- a/services/monitoring/dashboards/atlas-testing.json +++ b/services/monitoring/dashboards/atlas-testing.json @@ -322,6 +322,43 @@ } ] }, + { + "id": 12, + "type": "timeseries", + "title": "Ariadne Task Runs vs Errors (1h)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 12 + }, + "targets": [ + { + "expr": "sum by (status) (increase(ariadne_task_runs_total[1h]))", + "refId": "A", + "legendFormat": "{{status}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "multi" + } + } + }, { "id": 7, "type": "table", @@ -334,7 +371,7 @@ "h": 6, "w": 12, "x": 0, - "y": 12 + "y": 18 }, "targets": [ { @@ -384,7 +421,7 @@ "h": 6, "w": 12, "x": 12, - "y": 12 + "y": 18 }, "targets": [ { @@ -431,10 +468,10 @@ "uid": "atlas-vm" }, "gridPos": { - "h": 4, - "w": 24, - "x": 0, - "y": 18 + "h": 6, + "w": 12, + "x": 12, + "y": 24 }, "targets": [ { @@ -472,6 +509,56 @@ } ] }, + { + "id": 13, + "type": "table", + "title": "Ariadne Schedule Last Error (hours ago)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 24 + }, + "targets": [ + { + "expr": "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "h", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, { "id": 10, "type": "stat", @@ -484,7 +571,7 @@ "h": 4, "w": 6, "x": 0, - "y": 22 + "y": 30 }, "targets": [ { @@ -547,7 +634,7 @@ "h": 6, "w": 18, "x": 6, - "y": 22 + "y": 30 }, "targets": [ { diff --git a/services/monitoring/grafana-dashboard-testing.yaml b/services/monitoring/grafana-dashboard-testing.yaml index 09c29a4..362751b 100644 --- a/services/monitoring/grafana-dashboard-testing.yaml +++ b/services/monitoring/grafana-dashboard-testing.yaml @@ -331,6 +331,43 @@ data: } ] }, + { + "id": 12, + "type": "timeseries", + "title": "Ariadne Task Runs vs Errors (1h)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 12 + }, + "targets": [ + { + "expr": "sum by (status) (increase(ariadne_task_runs_total[1h]))", + "refId": "A", + "legendFormat": "{{status}}" + } + ], + "fieldConfig": { + "defaults": { + "unit": "none" + }, + "overrides": [] + }, + "options": { + "legend": { + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "multi" + } + } + }, { "id": 7, "type": "table", @@ -343,7 +380,7 @@ data: "h": 6, "w": 12, "x": 0, - "y": 12 + "y": 18 }, "targets": [ { @@ -393,7 +430,7 @@ data: "h": 6, "w": 12, "x": 12, - "y": 12 + "y": 18 }, "targets": [ { @@ -440,10 +477,10 @@ data: "uid": "atlas-vm" }, "gridPos": { - "h": 4, - "w": 24, - "x": 0, - "y": 18 + "h": 6, + "w": 12, + "x": 12, + "y": 24 }, "targets": [ { @@ -481,6 +518,56 @@ data: } ] }, + { + "id": 13, + "type": "table", + "title": "Ariadne Schedule Last Error (hours ago)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 24 + }, + "targets": [ + { + "expr": "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "h", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, { "id": 10, "type": "stat", @@ -493,7 +580,7 @@ data: "h": 4, "w": 6, "x": 0, - "y": 22 + "y": 30 }, "targets": [ { @@ -556,7 +643,7 @@ data: "h": 6, "w": 18, "x": 6, - "y": 22 + "y": 30 }, "targets": [ { diff --git a/services/nextcloud/cronjob.yaml b/services/nextcloud/cronjob.yaml index cc0091b..58d8aa1 100644 --- a/services/nextcloud/cronjob.yaml +++ b/services/nextcloud/cronjob.yaml @@ -6,6 +6,7 @@ metadata: namespace: nextcloud spec: schedule: "*/5 * * * *" + suspend: true concurrencyPolicy: Forbid jobTemplate: spec: diff --git a/services/nextcloud/maintenance-cronjob.yaml b/services/nextcloud/maintenance-cronjob.yaml index d4008c7..177cc02 100644 --- a/services/nextcloud/maintenance-cronjob.yaml +++ b/services/nextcloud/maintenance-cronjob.yaml @@ -6,6 +6,7 @@ metadata: namespace: nextcloud spec: schedule: "30 4 * * *" + suspend: true concurrencyPolicy: Forbid jobTemplate: spec: