From 166020ca1d12d265d878fee8a92089fa63ee5821 Mon Sep 17 00:00:00 2001 From: Brad Stein Date: Fri, 10 Apr 2026 21:22:35 -0300 Subject: [PATCH] ariadne: migrate glue cronjobs to schedules --- ci/tests/glue/config.yaml | 54 +- ci/tests/glue/test_ariadne_schedules.py | 88 ++++ ci/tests/glue/test_glue_cronjobs.py | 46 -- ci/tests/glue/test_glue_metrics.py | 53 +- scripts/dashboards_render_atlas.py | 128 +++-- services/bstein-dev-home/kustomization.yaml | 7 - .../scripts/vaultwarden_cred_sync.py | 245 --------- .../vaultwarden-cred-sync-cronjob.yaml | 86 ---- services/comms/guest-name-job.yaml | 471 ------------------ services/comms/kustomization.yaml | 4 - services/comms/pin-othrys-job.yaml | 169 ------- services/comms/reset-othrys-room-job.yaml | 312 ------------ services/comms/seed-othrys-room.yaml | 185 ------- services/finance/firefly-cronjob.yaml | 56 --- .../finance/firefly-user-sync-cronjob.yaml | 92 ---- services/finance/kustomization.yaml | 5 - .../finance/scripts/firefly_user_sync.php | 114 ----- services/health/kustomization.yaml | 10 - services/health/scripts/wger_user_sync.py | 120 ----- .../health/wger-admin-ensure-cronjob.yaml | 120 ----- services/health/wger-user-sync-cronjob.yaml | 106 ---- services/mailu/kustomization.yaml | 1 - services/mailu/mailu-sync-cronjob.yaml | 93 ---- services/maintenance/ariadne-deployment.yaml | 4 +- .../monitoring/dashboards/atlas-jobs.json | 30 +- .../monitoring/grafana-dashboard-jobs.yaml | 30 +- services/nextcloud-mail-sync/cronjob.yaml | 113 ----- .../nextcloud-mail-sync/kustomization.yaml | 7 - .../scripts/nextcloud-mail-sync.sh | 235 --------- services/nextcloud/cronjob.yaml | 48 -- services/nextcloud/kustomization.yaml | 8 - services/nextcloud/maintenance-cronjob.yaml | 98 ---- .../scripts/nextcloud-maintenance.sh | 108 ---- services/vault/k8s-auth-config-cronjob.yaml | 55 -- services/vault/kustomization.yaml | 12 - services/vault/oidc-config-cronjob.yaml | 83 --- services/vault/scripts/vault-entrypoint.sh | 34 -- .../vault/scripts/vault_k8s_auth_configure.sh | 259 ---------- .../vault/scripts/vault_oidc_configure.sh | 166 ------ 39 files changed, 283 insertions(+), 3572 deletions(-) create mode 100644 ci/tests/glue/test_ariadne_schedules.py delete mode 100644 ci/tests/glue/test_glue_cronjobs.py delete mode 100644 services/bstein-dev-home/scripts/vaultwarden_cred_sync.py delete mode 100644 services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml delete mode 100644 services/comms/guest-name-job.yaml delete mode 100644 services/comms/pin-othrys-job.yaml delete mode 100644 services/comms/reset-othrys-room-job.yaml delete mode 100644 services/comms/seed-othrys-room.yaml delete mode 100644 services/finance/firefly-cronjob.yaml delete mode 100644 services/finance/firefly-user-sync-cronjob.yaml delete mode 100644 services/finance/scripts/firefly_user_sync.php delete mode 100644 services/health/scripts/wger_user_sync.py delete mode 100644 services/health/wger-admin-ensure-cronjob.yaml delete mode 100644 services/health/wger-user-sync-cronjob.yaml delete mode 100644 services/mailu/mailu-sync-cronjob.yaml delete mode 100644 services/nextcloud-mail-sync/cronjob.yaml delete mode 100755 services/nextcloud-mail-sync/scripts/nextcloud-mail-sync.sh delete mode 100644 services/nextcloud/cronjob.yaml delete mode 100644 services/nextcloud/maintenance-cronjob.yaml delete mode 100755 services/nextcloud/scripts/nextcloud-maintenance.sh delete mode 100644 services/vault/k8s-auth-config-cronjob.yaml delete mode 100644 services/vault/oidc-config-cronjob.yaml delete mode 100644 services/vault/scripts/vault-entrypoint.sh delete mode 100644 services/vault/scripts/vault_k8s_auth_configure.sh delete mode 100644 services/vault/scripts/vault_oidc_configure.sh diff --git a/ci/tests/glue/config.yaml b/ci/tests/glue/config.yaml index e8c5caf7..5a89ac80 100644 --- a/ci/tests/glue/config.yaml +++ b/ci/tests/glue/config.yaml @@ -1,18 +1,38 @@ -max_success_age_hours: 48 -allow_suspended: - - bstein-dev-home/vaultwarden-cred-sync - - comms/guest-name-randomizer - - comms/othrys-room-reset - - comms/pin-othrys-invite - - comms/seed-othrys-room - - finance/firefly-user-sync - - health/wger-admin-ensure - - health/wger-user-sync - - mailu-mailserver/mailu-sync-nightly - - nextcloud/nextcloud-mail-sync - - vault/vault-oidc-config ariadne_schedule_tasks: - - schedule.mailu_sync - - schedule.nextcloud_sync - - schedule.vaultwarden_sync - - schedule.wger_admin + - task: schedule.mailu_sync + check_last_success: false + - task: schedule.nextcloud_sync + check_last_success: true + max_success_age_hours: 48 + - task: schedule.nextcloud_cron + check_last_success: true + max_success_age_hours: 48 + - task: schedule.nextcloud_maintenance + check_last_success: false + - task: schedule.vaultwarden_sync + check_last_success: true + max_success_age_hours: 48 + - task: schedule.wger_user_sync + check_last_success: true + max_success_age_hours: 48 + - task: schedule.wger_admin + check_last_success: false + - task: schedule.firefly_user_sync + check_last_success: true + max_success_age_hours: 48 + - task: schedule.firefly_cron + check_last_success: false + - task: schedule.vault_k8s_auth + check_last_success: false + - task: schedule.vault_oidc + check_last_success: false + - task: schedule.comms_guest_name + check_last_success: true + max_success_age_hours: 48 + - task: schedule.comms_pin_invite + check_last_success: false + - task: schedule.comms_reset_room + check_last_success: false + - task: schedule.comms_seed_room + check_last_success: true + max_success_age_hours: 48 diff --git a/ci/tests/glue/test_ariadne_schedules.py b/ci/tests/glue/test_ariadne_schedules.py new file mode 100644 index 00000000..c7504ce5 --- /dev/null +++ b/ci/tests/glue/test_ariadne_schedules.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +import os +from datetime import datetime, timezone +from pathlib import Path + +import requests +import yaml + + +CONFIG_PATH = Path(__file__).with_name("config.yaml") + + +def _load_config() -> dict: + with CONFIG_PATH.open("r", encoding="utf-8") as handle: + return yaml.safe_load(handle) or {} + + +def _query(promql: str) -> list[dict]: + vm_url = os.environ.get("VM_URL", "http://victoria-metrics-single-server:8428").rstrip("/") + response = requests.get(f"{vm_url}/api/v1/query", params={"query": promql}, timeout=10) + response.raise_for_status() + payload = response.json() + return payload.get("data", {}).get("result", []) + + +def _expected_tasks() -> list[dict]: + cfg = _load_config() + tasks = cfg.get("ariadne_schedule_tasks", []) + assert tasks, "No Ariadne schedule tasks configured" + return tasks + + +def _tracked_tasks(tasks: list[dict]) -> list[dict]: + tracked = [item for item in tasks if item.get("check_last_success")] + assert tracked, "No Ariadne schedule tasks are marked for success tracking" + return tracked + + +def _task_regex(tasks: list[dict]) -> str: + return "|".join(item["task"] for item in tasks) + + +def test_ariadne_schedule_series_exist(): + tasks = _expected_tasks() + selector = _task_regex(tasks) + series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}') + seen = {item.get("metric", {}).get("task") for item in series} + missing = [item["task"] for item in tasks if item["task"] not in seen] + assert not missing, f"Missing next-run metrics for: {', '.join(missing)}" + + +def test_ariadne_schedule_recent_success(): + tasks = _tracked_tasks(_expected_tasks()) + selector = _task_regex(tasks) + series = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}') + seen = {item.get("metric", {}).get("task") for item in series} + missing = [item["task"] for item in tasks if item["task"] not in seen] + assert not missing, f"Missing last-success metrics for: {', '.join(missing)}" + + now = datetime.now(timezone.utc) + age_by_task = { + item.get("metric", {}).get("task"): (now - datetime.fromtimestamp(float(item["value"][1]), tz=timezone.utc)).total_seconds() / 3600 + for item in series + } + too_old = [ + f"{task} ({age_by_task[task]:.1f}h > {item['max_success_age_hours']}h)" + for item in tasks + if (task := item["task"]) in age_by_task and age_by_task[task] > float(item["max_success_age_hours"]) + ] + assert not too_old, "Ariadne schedules are stale: " + ", ".join(too_old) + + +def test_ariadne_schedule_last_status_present_and_boolean(): + tasks = _tracked_tasks(_expected_tasks()) + selector = _task_regex(tasks) + series = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}') + seen = {item.get("metric", {}).get("task") for item in series} + missing = [item["task"] for item in tasks if item["task"] not in seen] + assert not missing, f"Missing last-status metrics for: {', '.join(missing)}" + + invalid = [] + for item in series: + task = item.get("metric", {}).get("task") + value = float(item["value"][1]) + if value not in (0.0, 1.0): + invalid.append(f"{task}={value}") + assert not invalid, f"Unexpected Ariadne last-status values: {', '.join(invalid)}" diff --git a/ci/tests/glue/test_glue_cronjobs.py b/ci/tests/glue/test_glue_cronjobs.py deleted file mode 100644 index ec6b6208..00000000 --- a/ci/tests/glue/test_glue_cronjobs.py +++ /dev/null @@ -1,46 +0,0 @@ -from __future__ import annotations - -from datetime import datetime, timezone -from pathlib import Path - -import yaml -from kubernetes import client, config - - -CONFIG_PATH = Path(__file__).with_name("config.yaml") - - -def _load_config() -> dict: - with CONFIG_PATH.open("r", encoding="utf-8") as handle: - return yaml.safe_load(handle) or {} - - -def _load_kube(): - try: - config.load_incluster_config() - except config.ConfigException: - config.load_kube_config() - - -def test_glue_cronjobs_recent_success(): - cfg = _load_config() - max_age_hours = int(cfg.get("max_success_age_hours", 48)) - allow_suspended = set(cfg.get("allow_suspended", [])) - - _load_kube() - batch = client.BatchV1Api() - cronjobs = batch.list_cron_job_for_all_namespaces(label_selector="atlas.bstein.dev/glue=true").items - - assert cronjobs, "No glue cronjobs found with atlas.bstein.dev/glue=true" - - now = datetime.now(timezone.utc) - for cronjob in cronjobs: - name = f"{cronjob.metadata.namespace}/{cronjob.metadata.name}" - if cronjob.spec.suspend: - assert name in allow_suspended, f"{name} is suspended but not in allow_suspended" - continue - - last_success = cronjob.status.last_successful_time - assert last_success is not None, f"{name} has no lastSuccessfulTime" - age_hours = (now - last_success).total_seconds() / 3600 - assert age_hours <= max_age_hours, f"{name} last success {age_hours:.1f}h ago" diff --git a/ci/tests/glue/test_glue_metrics.py b/ci/tests/glue/test_glue_metrics.py index 52ec0bef..054b9a0a 100644 --- a/ci/tests/glue/test_glue_metrics.py +++ b/ci/tests/glue/test_glue_metrics.py @@ -23,26 +23,45 @@ def _query(promql: str) -> list[dict]: return payload.get("data", {}).get("result", []) -def test_glue_metrics_present(): - series = _query('kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}') - assert series, "No glue cronjob label series found" +def _expected_tasks() -> list[dict]: + cfg = _load_config() + tasks = cfg.get("ariadne_schedule_tasks", []) + assert tasks, "No Ariadne schedule tasks configured" + return tasks -def test_glue_metrics_success_join(): - query = ( - "kube_cronjob_status_last_successful_time " - 'and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}' - ) - series = _query(query) - assert series, "No glue cronjob last success series found" +def _tracked_tasks(tasks: list[dict]) -> list[dict]: + tracked = [item for item in tasks if item.get("check_last_success")] + assert tracked, "No Ariadne schedule tasks are marked for success tracking" + return tracked + + +def _task_regex(tasks: list[dict]) -> str: + return "|".join(item["task"] for item in tasks) def test_ariadne_schedule_metrics_present(): - cfg = _load_config() - expected = cfg.get("ariadne_schedule_tasks", []) - if not expected: - return - series = _query("ariadne_schedule_next_run_timestamp_seconds") - tasks = {item.get("metric", {}).get("task") for item in series} - missing = [task for task in expected if task not in tasks] + tasks = _expected_tasks() + selector = _task_regex(tasks) + series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}') + seen = {item.get("metric", {}).get("task") for item in series} + missing = [item["task"] for item in tasks if item["task"] not in seen] assert not missing, f"Missing Ariadne schedule metrics for: {', '.join(missing)}" + + +def test_ariadne_schedule_success_and_status_metrics_present(): + tasks = _tracked_tasks(_expected_tasks()) + selector = _task_regex(tasks) + + success = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}') + status = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}') + + success_tasks = {item.get("metric", {}).get("task") for item in success} + status_tasks = {item.get("metric", {}).get("task") for item in status} + expected = {item["task"] for item in tasks} + + missing_success = sorted(expected - success_tasks) + missing_status = sorted(expected - status_tasks) + + assert not missing_success, f"Missing Ariadne success metrics for: {', '.join(missing_success)}" + assert not missing_status, f"Missing Ariadne status metrics for: {', '.join(missing_status)}" diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 3cac538e..4755c4b5 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -377,25 +377,75 @@ NAMESPACE_SCOPE_WORKLOAD = f'namespace!~"{INFRA_REGEX}"' NAMESPACE_SCOPE_ALL = 'namespace=~".*"' NAMESPACE_SCOPE_INFRA = f'namespace=~"{INFRA_REGEX}"' NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"] -GLUE_LABEL = 'label_atlas_bstein_dev_glue="true"' -GLUE_JOBS = f"kube_cronjob_labels{{{GLUE_LABEL}}}" -GLUE_FILTER = f"and on(namespace,cronjob) {GLUE_JOBS}" -GLUE_LAST_SUCCESS = f"(kube_cronjob_status_last_successful_time {GLUE_FILTER})" -GLUE_LAST_SCHEDULE = f"(kube_cronjob_status_last_schedule_time {GLUE_FILTER})" -GLUE_SUSPENDED = f"(kube_cronjob_spec_suspend {GLUE_FILTER}) == 1" -GLUE_ACTIVE = f"(kube_cronjob_status_active {GLUE_FILTER})" -GLUE_LAST_SUCCESS_AGE = f"(time() - {GLUE_LAST_SUCCESS})" -GLUE_LAST_SCHEDULE_AGE = f"(time() - {GLUE_LAST_SCHEDULE})" -GLUE_LAST_SUCCESS_AGE_HOURS = f"({GLUE_LAST_SUCCESS_AGE}) / 3600" -GLUE_LAST_SCHEDULE_AGE_HOURS = f"({GLUE_LAST_SCHEDULE_AGE}) / 3600" -GLUE_STALE_WINDOW_SEC = 36 * 3600 -GLUE_STALE = f"({GLUE_LAST_SUCCESS_AGE} > bool {GLUE_STALE_WINDOW_SEC})" -GLUE_MISSING = f"({GLUE_JOBS} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time)" -GLUE_STALE_ACTIVE = f"({GLUE_STALE} unless on(namespace,cronjob) {GLUE_SUSPENDED})" -GLUE_MISSING_ACTIVE = f"({GLUE_MISSING} unless on(namespace,cronjob) {GLUE_SUSPENDED})" -GLUE_STALE_COUNT = f"(sum({GLUE_STALE_ACTIVE}) + count({GLUE_MISSING_ACTIVE})) or on() vector(0)" -GLUE_MISSING_COUNT = f"count({GLUE_MISSING_ACTIVE}) or on() vector(0)" -GLUE_SUSPENDED_COUNT = f"sum({GLUE_SUSPENDED}) or on() vector(0)" + + +def promql_task_regex(tasks): + """Return a PromQL-safe regex alternation for the provided task names.""" + return "|".join(tasks) + + +ARIADNE_ALL_SCHEDULE_TASKS = [ + "schedule.mailu_sync", + "schedule.nextcloud_sync", + "schedule.nextcloud_cron", + "schedule.nextcloud_maintenance", + "schedule.vaultwarden_sync", + "schedule.wger_user_sync", + "schedule.wger_admin", + "schedule.firefly_user_sync", + "schedule.firefly_cron", + "schedule.vault_k8s_auth", + "schedule.vault_oidc", + "schedule.comms_guest_name", + "schedule.comms_pin_invite", + "schedule.comms_reset_room", + "schedule.comms_seed_room", +] +ARIADNE_FAST_SCHEDULE_TASKS = [ + task + for task in ARIADNE_ALL_SCHEDULE_TASKS + if task not in {"schedule.comms_pin_invite", "schedule.comms_reset_room"} +] +ARIADNE_SCHEDULE_HEALTH_TASKS = [ + "schedule.nextcloud_sync", + "schedule.nextcloud_cron", + "schedule.vaultwarden_sync", + "schedule.wger_user_sync", + "schedule.firefly_user_sync", + "schedule.comms_guest_name", + "schedule.comms_seed_room", +] +ARIADNE_ALL_SCHEDULE_FILTER = f'task=~"^({promql_task_regex(ARIADNE_ALL_SCHEDULE_TASKS)})$"' +ARIADNE_FAST_SCHEDULE_FILTER = f'task=~"^({promql_task_regex(ARIADNE_FAST_SCHEDULE_TASKS)})$"' +ARIADNE_SCHEDULE_HEALTH_FILTER = f'task=~"^({promql_task_regex(ARIADNE_SCHEDULE_HEALTH_TASKS)})$"' +ARIADNE_ALL_SCHEDULE_NEXT_RUN = f"ariadne_schedule_next_run_timestamp_seconds{{{ARIADNE_ALL_SCHEDULE_FILTER}}}" +ARIADNE_ALL_SCHEDULE_LAST_SUCCESS = ( + f"ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_ALL_SCHEDULE_FILTER}}}" +) +ARIADNE_ALL_SCHEDULE_LAST_ERROR = f"ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_ALL_SCHEDULE_FILTER}}}" +ARIADNE_ALL_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_ALL_SCHEDULE_FILTER}}}" +ARIADNE_FAST_SCHEDULE_LAST_SUCCESS = ( + f"ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_FAST_SCHEDULE_FILTER}}}" +) +ARIADNE_FAST_SCHEDULE_LAST_ERROR = f"ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_FAST_SCHEDULE_FILTER}}}" +ARIADNE_FAST_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_FAST_SCHEDULE_FILTER}}}" +ARIADNE_HEALTH_SCHEDULE_LAST_SUCCESS = ( + f"ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_HEALTH_FILTER}}}" +) +ARIADNE_HEALTH_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_SCHEDULE_HEALTH_FILTER}}}" +ARIADNE_SCHEDULE_LAST_SUCCESS_AGE = f"(time() - {ARIADNE_HEALTH_SCHEDULE_LAST_SUCCESS})" +ARIADNE_SCHEDULE_LAST_ERROR_AGE = f"(time() - {ARIADNE_ALL_SCHEDULE_LAST_ERROR})" +ARIADNE_SCHEDULE_LAST_SUCCESS_AGE_HOURS = f"({ARIADNE_SCHEDULE_LAST_SUCCESS_AGE}) / 3600" +ARIADNE_SCHEDULE_LAST_ERROR_AGE_HOURS = f"({ARIADNE_SCHEDULE_LAST_ERROR_AGE}) / 3600" +ARIADNE_SCHEDULE_STALE_WINDOW_SEC = 36 * 3600 +ARIADNE_SCHEDULE_STALE = f"(({ARIADNE_SCHEDULE_LAST_SUCCESS_AGE}) > bool {ARIADNE_SCHEDULE_STALE_WINDOW_SEC})" +ARIADNE_SCHEDULE_MISSING = ( + f"({ARIADNE_ALL_SCHEDULE_NEXT_RUN} unless on(task) {ARIADNE_HEALTH_SCHEDULE_LAST_SUCCESS})" +) +ARIADNE_SCHEDULE_FAILED = f"((1 - {ARIADNE_HEALTH_SCHEDULE_LAST_STATUS}) > bool 0)" +ARIADNE_SCHEDULE_STALE_COUNT = f"sum({ARIADNE_SCHEDULE_STALE}) or on() vector(0)" +ARIADNE_SCHEDULE_MISSING_COUNT = f"count({ARIADNE_SCHEDULE_MISSING}) or on() vector(0)" +ARIADNE_SCHEDULE_FAILED_COUNT = f"sum({ARIADNE_SCHEDULE_FAILED}) or on() vector(0)" ARIADNE_TASK_ERRORS_RANGE = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[$__range]))' ARIADNE_TASK_ERRORS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[24h]))' ARIADNE_TASK_ERRORS_1H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[1h]))' @@ -410,14 +460,18 @@ ARIADNE_TASK_FAILURES_SERIES = 'sum(increase(ariadne_task_runs_total{status="err ARIADNE_TASK_WARNINGS_SERIES = ( 'sum(increase(ariadne_task_runs_total{status!~"ok|error"}[$__interval])) or on() vector(0)' ) -ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600" -ARIADNE_SCHEDULE_LAST_ERROR_HOURS = "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600" +ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = f"(time() - {ARIADNE_ALL_SCHEDULE_LAST_SUCCESS}) / 3600" +ARIADNE_SCHEDULE_LAST_ERROR_HOURS = f"(time() - {ARIADNE_ALL_SCHEDULE_LAST_ERROR}) / 3600" ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS = ( - "(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600" + f"(time() - max_over_time({ARIADNE_ALL_SCHEDULE_LAST_SUCCESS}[$__range])) / 3600" ) ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS = ( - "(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600" + f"(time() - max_over_time({ARIADNE_ALL_SCHEDULE_LAST_ERROR}[$__range])) / 3600" ) +ARIADNE_FAST_SCHEDULE_LAST_SUCCESS_RANGE_HOURS = ( + f"(time() - max_over_time({ARIADNE_FAST_SCHEDULE_LAST_SUCCESS}[$__range])) / 3600" +) +ARIADNE_FAST_SCHEDULE_NEXT_RUN_HOURS = f"(({ARIADNE_ALL_SCHEDULE_NEXT_RUN} - time()) / 3600)" ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total" PLATFORM_TEST_SUITE_NAMES = [ "ariadne", @@ -593,8 +647,6 @@ ONEOFF_JOB_POD_AGE_HOURS = ( '* on(namespace,pod) group_left(phase) ' 'max by (namespace,pod,phase) (kube_pod_status_phase{phase=~"Running|Succeeded"})' ) -GLUE_LAST_SUCCESS_RANGE_HOURS = f"(time() - max_over_time({GLUE_LAST_SUCCESS}[$__range])) / 3600" -GLUE_LAST_SCHEDULE_RANGE_HOURS = f"(time() - max_over_time({GLUE_LAST_SCHEDULE}[$__range])) / 3600" GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"] GPU_NODE_REGEX = "|".join(GPU_NODES) TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))" @@ -2838,8 +2890,8 @@ def build_jobs_dashboard(): panels.append( stat_panel( 4, - "Glue Jobs Stale (>36h)", - GLUE_STALE_COUNT, + "Ariadne Schedules Stale (>36h)", + ARIADNE_SCHEDULE_STALE_COUNT, {"h": 4, "w": 4, "x": 0, "y": 7}, unit="none", thresholds={ @@ -2856,8 +2908,8 @@ def build_jobs_dashboard(): panels.append( stat_panel( 5, - "Glue Jobs Missing Success", - GLUE_MISSING_COUNT, + "Ariadne Schedules Missing Success", + ARIADNE_SCHEDULE_MISSING_COUNT, {"h": 4, "w": 4, "x": 4, "y": 7}, unit="none", ) @@ -2865,8 +2917,8 @@ def build_jobs_dashboard(): panels.append( stat_panel( 6, - "Glue Jobs Suspended", - GLUE_SUSPENDED_COUNT, + "Ariadne Schedules Failed Last Run", + ARIADNE_SCHEDULE_FAILED_COUNT, {"h": 4, "w": 4, "x": 8, "y": 7}, unit="none", ) @@ -2927,12 +2979,12 @@ def build_jobs_dashboard(): panels.append( bargauge_panel( 12, - "Glue Jobs Last Success (hours ago)", - GLUE_LAST_SUCCESS_RANGE_HOURS, + "Ariadne Fast Schedule Last Success (hours ago)", + ARIADNE_FAST_SCHEDULE_LAST_SUCCESS_RANGE_HOURS, {"h": 6, "w": 12, "x": 0, "y": 23}, unit="h", instant=True, - legend="{{namespace}}/{{cronjob}}", + legend="{{task}}", thresholds=age_thresholds, decimals=2, ) @@ -2940,12 +2992,12 @@ def build_jobs_dashboard(): panels.append( bargauge_panel( 13, - "Glue Jobs Last Schedule (hours ago)", - GLUE_LAST_SCHEDULE_RANGE_HOURS, + "Ariadne Fast Schedule Next Run (hours from now)", + ARIADNE_FAST_SCHEDULE_NEXT_RUN_HOURS, {"h": 6, "w": 12, "x": 12, "y": 23}, unit="h", instant=True, - legend="{{namespace}}/{{cronjob}}", + legend="{{task}}", thresholds=age_thresholds, decimals=2, ) @@ -3045,7 +3097,7 @@ def build_jobs_dashboard(): "annotations": {"list": []}, "schemaVersion": 39, "style": "dark", - "tags": ["atlas", "jobs", "glue"], + "tags": ["atlas", "jobs", "ariadne"], } diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index 18134538..b3a52194 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -15,7 +15,6 @@ resources: - frontend-service.yaml - backend-deployment.yaml - backend-service.yaml - - vaultwarden-cred-sync-cronjob.yaml - oneoffs/portal-onboarding-e2e-test-job.yaml - ingress.yaml images: @@ -30,12 +29,6 @@ configMapGenerator: - gateway.py=scripts/gateway.py options: disableNameSuffixHash: true - - name: vaultwarden-cred-sync-script - namespace: bstein-dev-home - files: - - vaultwarden_cred_sync.py=scripts/vaultwarden_cred_sync.py - options: - disableNameSuffixHash: true - name: portal-onboarding-e2e-tests namespace: bstein-dev-home files: diff --git a/services/bstein-dev-home/scripts/vaultwarden_cred_sync.py b/services/bstein-dev-home/scripts/vaultwarden_cred_sync.py deleted file mode 100644 index cb4f9c83..00000000 --- a/services/bstein-dev-home/scripts/vaultwarden_cred_sync.py +++ /dev/null @@ -1,245 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import os -import sys -import time -from datetime import datetime, timezone -from typing import Any, Iterable - -import httpx - -from atlas_portal import settings -from atlas_portal.keycloak import admin_client -from atlas_portal.vaultwarden import invite_user - - -VAULTWARDEN_EMAIL_ATTR = "vaultwarden_email" -VAULTWARDEN_STATUS_ATTR = "vaultwarden_status" -VAULTWARDEN_SYNCED_AT_ATTR = "vaultwarden_synced_at" -VAULTWARDEN_RETRY_COOLDOWN_SEC = int(os.getenv("VAULTWARDEN_RETRY_COOLDOWN_SEC", "1800")) -VAULTWARDEN_FAILURE_BAILOUT = int(os.getenv("VAULTWARDEN_FAILURE_BAILOUT", "2")) - - -def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]: - client = admin_client() - if not client.ready(): - raise RuntimeError("keycloak admin client not configured") - - url = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users" - first = 0 - while True: - headers = _headers_with_retry(client) - # We need attributes for idempotency (vaultwarden_status/vaultwarden_email). Keycloak defaults to a - # brief representation which may omit these. - params = {"first": str(first), "max": str(page_size), "briefRepresentation": "false"} - payload = None - for attempt in range(1, 6): - try: - with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http: - resp = http.get(url, params=params, headers=headers) - resp.raise_for_status() - payload = resp.json() - break - except httpx.HTTPError as exc: - if attempt == 5: - raise - time.sleep(attempt * 2) - - if not isinstance(payload, list) or not payload: - return - - for item in payload: - if isinstance(item, dict): - yield item - - if len(payload) < page_size: - return - first += page_size - - -def _headers_with_retry(client, attempts: int = 6) -> dict[str, str]: - last_exc: Exception | None = None - for attempt in range(1, attempts + 1): - try: - return client.headers() - except Exception as exc: - last_exc = exc - time.sleep(attempt * 2) - if last_exc: - raise last_exc - raise RuntimeError("failed to fetch keycloak headers") - - -def _extract_attr(attrs: Any, key: str) -> str: - if not isinstance(attrs, dict): - return "" - raw = attrs.get(key) - if isinstance(raw, list): - for item in raw: - if isinstance(item, str) and item.strip(): - return item.strip() - return "" - if isinstance(raw, str) and raw.strip(): - return raw.strip() - return "" - - -def _parse_synced_at(value: str) -> float | None: - value = (value or "").strip() - if not value: - return None - for fmt in ("%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S%z"): - try: - parsed = datetime.strptime(value, fmt) - if parsed.tzinfo is None: - parsed = parsed.replace(tzinfo=timezone.utc) - return parsed.timestamp() - except ValueError: - continue - return None - - -def _vaultwarden_email_for_user(user: dict[str, Any]) -> str: - username = (user.get("username") if isinstance(user.get("username"), str) else "") or "" - username = username.strip() - if not username: - return "" - - attrs = user.get("attributes") - vaultwarden_email = _extract_attr(attrs, VAULTWARDEN_EMAIL_ATTR) - if vaultwarden_email: - return vaultwarden_email - - mailu_email = _extract_attr(attrs, "mailu_email") - if mailu_email: - return mailu_email - - email = (user.get("email") if isinstance(user.get("email"), str) else "") or "" - email = email.strip() - if email and email.lower().endswith(f"@{settings.MAILU_DOMAIN.lower()}"): - return email - - # Don't guess an internal mailbox address until Mailu sync has run and stored mailu_email. - # This avoids spamming Vaultwarden invites that can never be delivered (unknown recipient). - return "" - - -def _set_user_attribute_if_missing(username: str, user: dict[str, Any], key: str, value: str) -> None: - value = (value or "").strip() - if not value: - return - existing = _extract_attr(user.get("attributes"), key) - if existing: - return - admin_client().set_user_attribute(username, key, value) - - -def _set_user_attribute(username: str, key: str, value: str) -> None: - value = (value or "").strip() - if not value: - return - admin_client().set_user_attribute(username, key, value) - - -def main() -> int: - processed = 0 - created = 0 - skipped = 0 - failures = 0 - consecutive_failures = 0 - - for user in _iter_keycloak_users(): - username = (user.get("username") if isinstance(user.get("username"), str) else "") or "" - username = username.strip() - if not username: - skipped += 1 - continue - - enabled = user.get("enabled") - if enabled is False: - skipped += 1 - continue - - if user.get("serviceAccountClientId") or username.startswith("service-account-"): - skipped += 1 - continue - - # Fetch the full user payload so we can reliably read attributes (and skip re-invites). - user_id = (user.get("id") if isinstance(user.get("id"), str) else "") or "" - user_id = user_id.strip() - full_user = user - if user_id: - try: - full_user = admin_client().get_user(user_id) - except Exception: - full_user = user - - current_status = _extract_attr(full_user.get("attributes"), VAULTWARDEN_STATUS_ATTR) - current_synced_at = _extract_attr(full_user.get("attributes"), VAULTWARDEN_SYNCED_AT_ATTR) - current_synced_ts = _parse_synced_at(current_synced_at) - if current_status in {"rate_limited", "error"} and current_synced_ts: - if time.time() - current_synced_ts < VAULTWARDEN_RETRY_COOLDOWN_SEC: - skipped += 1 - continue - email = _vaultwarden_email_for_user(full_user) - if not email: - print(f"skip {username}: missing email", file=sys.stderr) - skipped += 1 - continue - - try: - _set_user_attribute_if_missing(username, full_user, VAULTWARDEN_EMAIL_ATTR, email) - except Exception: - pass - - # If we've already successfully invited or confirmed presence, do not re-invite on every cron run. - # Vaultwarden returns 409 for "already exists", which is idempotent but noisy and can trigger rate limits. - if current_status in {"invited", "already_present"}: - if not current_synced_at: - try: - _set_user_attribute( - username, - VAULTWARDEN_SYNCED_AT_ATTR, - time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), - ) - except Exception: - pass - skipped += 1 - continue - - processed += 1 - result = invite_user(email) - if result.ok: - created += 1 - consecutive_failures = 0 - print(f"ok {username}: {result.status}") - try: - _set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status) - _set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())) - except Exception: - pass - else: - failures += 1 - if result.status in {"rate_limited", "error"}: - consecutive_failures += 1 - print(f"err {username}: {result.status} {result.detail}", file=sys.stderr) - try: - _set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status) - _set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())) - except Exception: - pass - if consecutive_failures >= VAULTWARDEN_FAILURE_BAILOUT: - print("vaultwarden: too many consecutive failures; aborting run", file=sys.stderr) - break - - print( - f"done processed={processed} created_or_present={created} skipped={skipped} failures={failures}", - file=sys.stderr, - ) - return 0 if failures == 0 else 2 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml deleted file mode 100644 index acd851b1..00000000 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ /dev/null @@ -1,86 +0,0 @@ -# services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: vaultwarden-cred-sync - namespace: bstein-dev-home - labels: - atlas.bstein.dev/glue: "true" -spec: - schedule: "*/15 * * * *" - suspend: true - concurrencyPolicy: Forbid - successfulJobsHistoryLimit: 1 - failedJobsHistoryLimit: 3 - jobTemplate: - spec: - backoffLimit: 0 - template: - metadata: - annotations: - vault.hashicorp.com/agent-inject: "true" - vault.hashicorp.com/agent-pre-populate-only: "true" - vault.hashicorp.com/role: "bstein-dev-home" - vault.hashicorp.com/agent-inject-secret-portal-env.sh: "kv/data/atlas/portal/atlas-portal-db" - vault.hashicorp.com/agent-inject-template-portal-env.sh: | - {{ with secret "kv/data/atlas/portal/atlas-portal-db" }} - export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}" - {{ end }} - {{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }} - export KEYCLOAK_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}" - {{ end }} - {{ with secret "kv/data/atlas/shared/chat-ai-keys-runtime" }} - export CHAT_KEY_MATRIX="{{ .Data.data.matrix }}" - export CHAT_KEY_HOMEPAGE="{{ .Data.data.homepage }}" - {{ end }} - {{ with secret "kv/data/atlas/shared/portal-e2e-client" }} - export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}" - export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}" - {{ end }} - spec: - serviceAccountName: bstein-dev-home - restartPolicy: Never - nodeSelector: - kubernetes.io/arch: arm64 - node-role.kubernetes.io/worker: "true" - imagePullSecrets: - - name: harbor-regcred - containers: - - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-95 - imagePullPolicy: Always - command: ["/bin/sh", "-c"] - args: - - >- - . /vault/secrets/portal-env.sh - && exec python /scripts/vaultwarden_cred_sync.py - env: - - name: PYTHONPATH - value: /app - - name: KEYCLOAK_ENABLED - value: "true" - - name: KEYCLOAK_REALM - value: atlas - - name: KEYCLOAK_ADMIN_URL - value: http://keycloak.sso.svc.cluster.local - - name: KEYCLOAK_ADMIN_REALM - value: atlas - - name: KEYCLOAK_ADMIN_CLIENT_ID - value: bstein-dev-home-admin - - name: HTTP_CHECK_TIMEOUT_SEC - value: "20" - - name: VAULTWARDEN_ADMIN_SESSION_TTL_SEC - value: "900" - - name: VAULTWARDEN_RETRY_COOLDOWN_SEC - value: "1800" - - name: VAULTWARDEN_FAILURE_BAILOUT - value: "2" - volumeMounts: - - name: vaultwarden-cred-sync-script - mountPath: /scripts - readOnly: true - volumes: - - name: vaultwarden-cred-sync-script - configMap: - name: vaultwarden-cred-sync-script - defaultMode: 0555 diff --git a/services/comms/guest-name-job.yaml b/services/comms/guest-name-job.yaml deleted file mode 100644 index 3eae2dd2..00000000 --- a/services/comms/guest-name-job.yaml +++ /dev/null @@ -1,471 +0,0 @@ -# services/comms/guest-name-job.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: guest-name-randomizer - namespace: comms - labels: - atlas.bstein.dev/glue: "true" -spec: - schedule: "*/1 * * * *" - suspend: true - concurrencyPolicy: Forbid - successfulJobsHistoryLimit: 1 - failedJobsHistoryLimit: 1 - jobTemplate: - spec: - backoffLimit: 0 - template: - metadata: - annotations: - vault.hashicorp.com/agent-inject: "true" - vault.hashicorp.com/agent-pre-populate-only: "true" - vault.hashicorp.com/role: "comms" - vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" - vault.hashicorp.com/agent-inject-template-turn-secret: | - {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" - vault.hashicorp.com/agent-inject-template-livekit-primary: | - {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" - vault.hashicorp.com/agent-inject-template-bot-pass: | - {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" - vault.hashicorp.com/agent-inject-template-seeder-pass: | - {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" - vault.hashicorp.com/agent-inject-template-chat-matrix: | - {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" - vault.hashicorp.com/agent-inject-template-chat-homepage: | - {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" - vault.hashicorp.com/agent-inject-template-mas-admin-secret: | - {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" - vault.hashicorp.com/agent-inject-template-synapse-db-pass: | - {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" - vault.hashicorp.com/agent-inject-template-mas-db-pass: | - {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" - vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | - {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" - vault.hashicorp.com/agent-inject-template-mas-kc-secret: | - {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} - spec: - restartPolicy: Never - serviceAccountName: comms-vault - nodeSelector: - hardware: rpi5 - volumes: - - name: vault-scripts - configMap: - name: comms-vault-env - defaultMode: 0555 - containers: - - name: rename - image: registry.bstein.dev/bstein/comms-guest-tools:0.1.0 - volumeMounts: - - name: vault-scripts - mountPath: /vault/scripts - readOnly: true - env: - - name: SYNAPSE_BASE - value: http://othrys-synapse-matrix-synapse:8008 - - name: MAS_ADMIN_CLIENT_ID - value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM - - name: MAS_ADMIN_CLIENT_SECRET_FILE - value: /vault/secrets/mas-admin-secret - - name: MAS_ADMIN_API_BASE - value: http://matrix-authentication-service:8081/api/admin/v1 - - name: MAS_TOKEN_URL - value: http://matrix-authentication-service:8080/oauth2/token - - name: SEEDER_USER - value: othrys-seeder - - name: PGHOST - value: postgres-service.postgres.svc.cluster.local - - name: PGPORT - value: "5432" - - name: PGDATABASE - value: synapse - - name: PGUSER - value: synapse - command: - - /bin/sh - - -c - - | - set -euo pipefail - . /vault/scripts/comms_vault_env.sh - python - <<'PY' - import base64 - import os - import random - import requests - import time - import urllib.parse - import psycopg2 - - ADJ = [ - "brisk","calm","eager","gentle","merry","nifty","rapid","sunny","witty","zesty", - "amber","bold","bright","crisp","daring","frosty","glad","jolly","lively","mellow", - "quiet","ripe","serene","spry","tidy","vivid","warm","wild","clever","kind", - ] - NOUN = [ - "otter","falcon","comet","ember","grove","harbor","meadow","raven","river","summit", - "breeze","cedar","cinder","cove","delta","forest","glade","lark","marsh","peak", - "pine","quartz","reef","ridge","sable","sage","shore","thunder","vale","zephyr", - ] - - BASE = os.environ["SYNAPSE_BASE"] - MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"] - MAS_ADMIN_CLIENT_SECRET_FILE = os.environ["MAS_ADMIN_CLIENT_SECRET_FILE"] - MAS_ADMIN_API_BASE = os.environ["MAS_ADMIN_API_BASE"].rstrip("/") - MAS_TOKEN_URL = os.environ["MAS_TOKEN_URL"] - SEEDER_USER = os.environ["SEEDER_USER"] - ROOM_ALIAS = "#othrys:live.bstein.dev" - SERVER_NAME = "live.bstein.dev" - STALE_GUEST_MS = 14 * 24 * 60 * 60 * 1000 - - def mas_admin_token(): - with open(MAS_ADMIN_CLIENT_SECRET_FILE, "r", encoding="utf-8") as f: - secret = f.read().strip() - basic = base64.b64encode(f"{MAS_ADMIN_CLIENT_ID}:{secret}".encode()).decode() - last_err = None - for attempt in range(5): - try: - r = requests.post( - MAS_TOKEN_URL, - headers={"Authorization": f"Basic {basic}"}, - data={"grant_type": "client_credentials", "scope": "urn:mas:admin"}, - timeout=30, - ) - r.raise_for_status() - return r.json()["access_token"] - except Exception as exc: # noqa: BLE001 - last_err = exc - time.sleep(2 ** attempt) - raise last_err - - def mas_user_id(token, username): - r = requests.get( - f"{MAS_ADMIN_API_BASE}/users/by-username/{urllib.parse.quote(username)}", - headers={"Authorization": f"Bearer {token}"}, - timeout=30, - ) - r.raise_for_status() - return r.json()["data"]["id"] - - def mas_personal_session(token, user_id): - r = requests.post( - f"{MAS_ADMIN_API_BASE}/personal-sessions", - headers={"Authorization": f"Bearer {token}"}, - json={ - "actor_user_id": user_id, - "human_name": "guest-name-randomizer", - "scope": "urn:matrix:client:api:*", - "expires_in": 300, - }, - timeout=30, - ) - r.raise_for_status() - data = r.json().get("data", {}).get("attributes", {}) or {} - return data["access_token"], r.json()["data"]["id"] - - def mas_revoke_session(token, session_id): - requests.post( - f"{MAS_ADMIN_API_BASE}/personal-sessions/{urllib.parse.quote(session_id)}/revoke", - headers={"Authorization": f"Bearer {token}"}, - json={}, - timeout=30, - ) - - def resolve_alias(token, alias): - headers = {"Authorization": f"Bearer {token}"} - enc = urllib.parse.quote(alias) - r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=headers) - r.raise_for_status() - return r.json()["room_id"] - - def room_members(token, room_id): - headers = {"Authorization": f"Bearer {token}"} - r = requests.get(f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members", headers=headers) - r.raise_for_status() - members = set() - existing_names = set() - for ev in r.json().get("chunk", []): - user_id = ev.get("state_key") - if user_id: - members.add(user_id) - disp = (ev.get("content") or {}).get("displayname") - if disp: - existing_names.add(disp) - return members, existing_names - - def mas_list_users(token): - headers = {"Authorization": f"Bearer {token}"} - users = [] - cursor = None - while True: - url = f"{MAS_ADMIN_API_BASE}/users?page[size]=100" - if cursor: - url += f"&page[after]={urllib.parse.quote(cursor)}" - r = requests.get(url, headers=headers, timeout=30) - r.raise_for_status() - data = r.json().get("data", []) - if not data: - break - users.extend(data) - cursor = data[-1].get("meta", {}).get("page", {}).get("cursor") - if not cursor: - break - return users - - def synapse_list_users(token): - headers = {"Authorization": f"Bearer {token}"} - users = [] - from_token = None - while True: - url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100" - if from_token: - url += f"&from={urllib.parse.quote(from_token)}" - r = requests.get(url, headers=headers, timeout=30) - r.raise_for_status() - payload = r.json() - users.extend(payload.get("users", [])) - from_token = payload.get("next_token") - if not from_token: - break - return users - - def should_prune_guest(entry, now_ms): - if not entry.get("is_guest"): - return False - last_seen = entry.get("last_seen_ts") - if last_seen is None: - return False - try: - last_seen = int(last_seen) - except (TypeError, ValueError): - return False - return now_ms - last_seen > STALE_GUEST_MS - - def prune_guest(token, user_id): - headers = {"Authorization": f"Bearer {token}"} - try: - r = requests.delete( - f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}", - headers=headers, - params={"erase": "true"}, - timeout=30, - ) - except Exception as exc: # noqa: BLE001 - print(f"guest prune failed for {user_id}: {exc}") - return False - if r.status_code in (200, 202, 204, 404): - return True - print(f"guest prune failed for {user_id}: {r.status_code} {r.text}") - return False - - def user_id_for_username(username): - return f"@{username}:live.bstein.dev" - - def get_displayname(token, user_id): - headers = {"Authorization": f"Bearer {token}"} - r = requests.get(f"{BASE}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}", headers=headers) - r.raise_for_status() - return r.json().get("displayname") - - def get_displayname_admin(token, user_id): - headers = {"Authorization": f"Bearer {token}"} - r = requests.get( - f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}", - headers=headers, - timeout=30, - ) - if r.status_code == 404: - return None - r.raise_for_status() - return r.json().get("displayname") - - def set_displayname(token, room_id, user_id, name, in_room): - headers = {"Authorization": f"Bearer {token}"} - payload = {"displayname": name} - r = requests.put( - f"{BASE}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}/displayname", - headers=headers, - json=payload, - ) - r.raise_for_status() - if not in_room: - return - state_url = f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.member/{urllib.parse.quote(user_id)}" - content = {"membership": "join", "displayname": name} - requests.put(state_url, headers=headers, json=content, timeout=30) - - def set_displayname_admin(token, user_id, name): - headers = {"Authorization": f"Bearer {token}"} - payload = {"displayname": name} - r = requests.put( - f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}", - headers=headers, - json=payload, - timeout=30, - ) - if r.status_code in (200, 201, 204): - return True - return False - - def needs_rename_username(username): - return username.isdigit() or username.startswith("guest-") - - def needs_rename_display(display): - return not display or display.isdigit() or display.startswith("guest-") - - def db_rename_numeric(existing_names): - profile_rows = [] - profile_index = {} - users = [] - conn = psycopg2.connect( - host=os.environ["PGHOST"], - port=int(os.environ["PGPORT"]), - dbname=os.environ["PGDATABASE"], - user=os.environ["PGUSER"], - password=os.environ["PGPASSWORD"], - ) - try: - with conn: - with conn.cursor() as cur: - cur.execute( - "SELECT user_id, full_user_id, displayname FROM profiles WHERE full_user_id ~ %s", - (f"^@\\d+:{SERVER_NAME}$",), - ) - profile_rows = cur.fetchall() - profile_index = {row[1]: row for row in profile_rows} - for user_id, full_user_id, display in profile_rows: - if display and not needs_rename_display(display): - continue - new = None - for _ in range(30): - candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}" - if candidate not in existing_names: - new = candidate - existing_names.add(candidate) - break - if not new: - continue - cur.execute( - "UPDATE profiles SET displayname = %s WHERE full_user_id = %s", - (new, full_user_id), - ) - - cur.execute( - "SELECT name FROM users WHERE name ~ %s", - (f"^@\\d+:{SERVER_NAME}$",), - ) - users = [row[0] for row in cur.fetchall()] - if not users: - return - cur.execute( - "SELECT user_id, full_user_id FROM profiles WHERE full_user_id = ANY(%s)", - (users,), - ) - for existing_full in cur.fetchall(): - profile_index.setdefault(existing_full[1], existing_full) - - for full_user_id in users: - if full_user_id in profile_index: - continue - localpart = full_user_id.split(":", 1)[0].lstrip("@") - new = None - for _ in range(30): - candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}" - if candidate not in existing_names: - new = candidate - existing_names.add(candidate) - break - if not new: - continue - cur.execute( - "INSERT INTO profiles (user_id, displayname, full_user_id) VALUES (%s, %s, %s) " - "ON CONFLICT (full_user_id) DO UPDATE SET displayname = EXCLUDED.displayname", - (localpart, new, full_user_id), - ) - finally: - conn.close() - - admin_token = mas_admin_token() - seeder_id = mas_user_id(admin_token, SEEDER_USER) - seeder_token, seeder_session = mas_personal_session(admin_token, seeder_id) - try: - room_id = resolve_alias(seeder_token, ROOM_ALIAS) - members, existing = room_members(seeder_token, room_id) - users = mas_list_users(admin_token) - mas_usernames = set() - for user in users: - attrs = user.get("attributes") or {} - username = attrs.get("username") or "" - if username: - mas_usernames.add(username) - legacy_guest = attrs.get("legacy_guest") - if not username: - continue - if not (legacy_guest or needs_rename_username(username)): - continue - user_id = user_id_for_username(username) - access_token, session_id = mas_personal_session(admin_token, user["id"]) - try: - display = get_displayname(access_token, user_id) - if display and not needs_rename_display(display): - continue - new = None - for _ in range(30): - candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}" - if candidate not in existing: - new = candidate - existing.add(candidate) - break - if not new: - continue - set_displayname(access_token, room_id, user_id, new, user_id in members) - finally: - mas_revoke_session(admin_token, session_id) - - try: - entries = synapse_list_users(seeder_token) - except Exception as exc: # noqa: BLE001 - print(f"synapse admin list skipped: {exc}") - entries = [] - now_ms = int(time.time() * 1000) - for entry in entries: - user_id = entry.get("name") or "" - if not user_id.startswith("@"): - continue - localpart = user_id.split(":", 1)[0].lstrip("@") - if localpart in mas_usernames: - continue - is_guest = entry.get("is_guest") - if is_guest and should_prune_guest(entry, now_ms): - if prune_guest(seeder_token, user_id): - continue - if not (is_guest or needs_rename_username(localpart)): - continue - display = get_displayname_admin(seeder_token, user_id) - if display and not needs_rename_display(display): - continue - new = None - for _ in range(30): - candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}" - if candidate not in existing: - new = candidate - existing.add(candidate) - break - if not new: - continue - if not set_displayname_admin(seeder_token, user_id, new): - continue - db_rename_numeric(existing) - finally: - mas_revoke_session(admin_token, seeder_session) - PY diff --git a/services/comms/kustomization.yaml b/services/comms/kustomization.yaml index 969ca586..db763c23 100644 --- a/services/comms/kustomization.yaml +++ b/services/comms/kustomization.yaml @@ -34,11 +34,7 @@ resources: - livekit-token-deployment.yaml - livekit.yaml - coturn.yaml - - seed-othrys-room.yaml - - guest-name-job.yaml - oneoffs/othrys-kick-numeric-job.yaml - - pin-othrys-job.yaml - - reset-othrys-room-job.yaml - oneoffs/bstein-force-leave-job.yaml - livekit-ingress.yaml - livekit-middlewares.yaml diff --git a/services/comms/pin-othrys-job.yaml b/services/comms/pin-othrys-job.yaml deleted file mode 100644 index 2b29ca39..00000000 --- a/services/comms/pin-othrys-job.yaml +++ /dev/null @@ -1,169 +0,0 @@ -# services/comms/pin-othrys-job.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: pin-othrys-invite - namespace: comms - labels: - atlas.bstein.dev/glue: "true" -spec: - schedule: "*/30 * * * *" - suspend: true - concurrencyPolicy: Forbid - successfulJobsHistoryLimit: 1 - failedJobsHistoryLimit: 1 - jobTemplate: - spec: - backoffLimit: 0 - template: - metadata: - annotations: - vault.hashicorp.com/agent-inject: "true" - vault.hashicorp.com/agent-pre-populate-only: "true" - vault.hashicorp.com/role: "comms" - vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" - vault.hashicorp.com/agent-inject-template-turn-secret: | - {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" - vault.hashicorp.com/agent-inject-template-livekit-primary: | - {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" - vault.hashicorp.com/agent-inject-template-bot-pass: | - {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" - vault.hashicorp.com/agent-inject-template-seeder-pass: | - {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" - vault.hashicorp.com/agent-inject-template-chat-matrix: | - {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" - vault.hashicorp.com/agent-inject-template-chat-homepage: | - {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" - vault.hashicorp.com/agent-inject-template-mas-admin-secret: | - {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" - vault.hashicorp.com/agent-inject-template-synapse-db-pass: | - {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" - vault.hashicorp.com/agent-inject-template-mas-db-pass: | - {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" - vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | - {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" - vault.hashicorp.com/agent-inject-template-mas-kc-secret: | - {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} - spec: - restartPolicy: Never - serviceAccountName: comms-vault - containers: - - name: pin - image: python:3.11-slim - env: - - name: SYNAPSE_BASE - value: http://othrys-synapse-matrix-synapse:8008 - - name: AUTH_BASE - value: http://matrix-authentication-service:8080 - - name: SEEDER_USER - value: othrys-seeder - command: - - /bin/sh - - -c - - | - set -euo pipefail - . /vault/scripts/comms_vault_env.sh - pip install --no-cache-dir requests >/dev/null - python - <<'PY' - import os, requests, urllib.parse - - BASE = os.environ["SYNAPSE_BASE"] - AUTH_BASE = os.environ.get("AUTH_BASE", BASE) - ROOM_ALIAS = "#othrys:live.bstein.dev" - MESSAGE = ( - "Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join " - "and choose 'Continue' -> 'Join as guest'." - ) - - def auth(token): return {"Authorization": f"Bearer {token}"} - - def canon_user(user): - u = (user or "").strip() - if u.startswith("@") and ":" in u: - return u - u = u.lstrip("@") - if ":" in u: - return f"@{u}" - return f"@{u}:live.bstein.dev" - - def login(user, password): - r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={ - "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": canon_user(user)}, - "password": password, - }) - r.raise_for_status() - return r.json()["access_token"] - - def resolve(alias, token): - enc = urllib.parse.quote(alias) - r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token)) - r.raise_for_status() - return r.json()["room_id"] - - def get_pinned(room_id, token): - r = requests.get( - f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events", - headers=auth(token), - ) - if r.status_code == 404: - return [] - r.raise_for_status() - return r.json().get("pinned", []) - - def get_event(room_id, event_id, token): - r = requests.get( - f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/event/{urllib.parse.quote(event_id)}", - headers=auth(token), - ) - if r.status_code == 404: - return None - r.raise_for_status() - return r.json() - - def send(room_id, token, body): - r = requests.post( - f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message", - headers=auth(token), - json={"msgtype": "m.text", "body": body}, - ) - r.raise_for_status() - return r.json()["event_id"] - - def pin(room_id, token, event_id): - r = requests.put( - f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events", - headers=auth(token), - json={"pinned": [event_id]}, - ) - r.raise_for_status() - - token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"]) - room_id = resolve(ROOM_ALIAS, token) - for event_id in get_pinned(room_id, token): - ev = get_event(room_id, event_id, token) - if ev and ev.get("content", {}).get("body") == MESSAGE: - raise SystemExit(0) - - eid = send(room_id, token, MESSAGE) - pin(room_id, token, eid) - PY - volumeMounts: - - name: vault-scripts - mountPath: /vault/scripts - readOnly: true - volumes: - - name: vault-scripts - configMap: - name: comms-vault-env - defaultMode: 0555 diff --git a/services/comms/reset-othrys-room-job.yaml b/services/comms/reset-othrys-room-job.yaml deleted file mode 100644 index ae8585a7..00000000 --- a/services/comms/reset-othrys-room-job.yaml +++ /dev/null @@ -1,312 +0,0 @@ -# services/comms/reset-othrys-room-job.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: othrys-room-reset - namespace: comms - labels: - atlas.bstein.dev/glue: "true" -spec: - schedule: "0 0 1 1 *" - suspend: true - concurrencyPolicy: Forbid - successfulJobsHistoryLimit: 1 - failedJobsHistoryLimit: 1 - jobTemplate: - spec: - backoffLimit: 0 - template: - metadata: - annotations: - vault.hashicorp.com/agent-inject: "true" - vault.hashicorp.com/agent-pre-populate-only: "true" - vault.hashicorp.com/role: "comms" - vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" - vault.hashicorp.com/agent-inject-template-turn-secret: | - {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" - vault.hashicorp.com/agent-inject-template-livekit-primary: | - {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" - vault.hashicorp.com/agent-inject-template-bot-pass: | - {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" - vault.hashicorp.com/agent-inject-template-seeder-pass: | - {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" - vault.hashicorp.com/agent-inject-template-chat-matrix: | - {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" - vault.hashicorp.com/agent-inject-template-chat-homepage: | - {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" - vault.hashicorp.com/agent-inject-template-mas-admin-secret: | - {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" - vault.hashicorp.com/agent-inject-template-synapse-db-pass: | - {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" - vault.hashicorp.com/agent-inject-template-mas-db-pass: | - {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" - vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | - {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" - vault.hashicorp.com/agent-inject-template-mas-kc-secret: | - {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} - spec: - restartPolicy: Never - serviceAccountName: comms-vault - containers: - - name: reset - image: python:3.11-slim - env: - - name: SYNAPSE_BASE - value: http://othrys-synapse-matrix-synapse:8008 - - name: AUTH_BASE - value: http://matrix-authentication-service:8080 - - name: SERVER_NAME - value: live.bstein.dev - - name: ROOM_ALIAS - value: "#othrys:live.bstein.dev" - - name: ROOM_NAME - value: Othrys - - name: PIN_MESSAGE - value: "Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join and choose 'Continue' -> 'Join as guest'." - - name: SEEDER_USER - value: othrys-seeder - - name: BOT_USER - value: atlasbot - command: - - /bin/sh - - -c - - | - set -euo pipefail - . /vault/scripts/comms_vault_env.sh - pip install --no-cache-dir requests >/dev/null - python - <<'PY' - import os - import time - import urllib.parse - import requests - - BASE = os.environ["SYNAPSE_BASE"] - AUTH_BASE = os.environ.get("AUTH_BASE", BASE) - SERVER_NAME = os.environ.get("SERVER_NAME", "live.bstein.dev") - ROOM_ALIAS = os.environ.get("ROOM_ALIAS", "#othrys:live.bstein.dev") - ROOM_NAME = os.environ.get("ROOM_NAME", "Othrys") - PIN_MESSAGE = os.environ["PIN_MESSAGE"] - SEEDER_USER = os.environ["SEEDER_USER"] - SEEDER_PASS = os.environ["SEEDER_PASS"] - BOT_USER = os.environ["BOT_USER"] - - POWER_LEVELS = { - "ban": 50, - "events": { - "m.room.avatar": 50, - "m.room.canonical_alias": 50, - "m.room.encryption": 100, - "m.room.history_visibility": 100, - "m.room.name": 50, - "m.room.power_levels": 100, - "m.room.server_acl": 100, - "m.room.tombstone": 100, - }, - "events_default": 0, - "historical": 100, - "invite": 50, - "kick": 50, - "m.call.invite": 50, - "redact": 50, - "state_default": 50, - "users": {f"@{SEEDER_USER}:{SERVER_NAME}": 100}, - "users_default": 0, - } - - def auth(token): - return {"Authorization": f"Bearer {token}"} - - def canon_user(user): - u = (user or "").strip() - if u.startswith("@") and ":" in u: - return u - u = u.lstrip("@") - if ":" in u: - return f"@{u}" - return f"@{u}:{SERVER_NAME}" - - def login(user, password): - r = requests.post( - f"{AUTH_BASE}/_matrix/client/v3/login", - json={ - "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": canon_user(user)}, - "password": password, - }, - ) - if r.status_code != 200: - raise SystemExit(f"login failed: {r.status_code} {r.text}") - return r.json()["access_token"] - - def resolve_alias(token, alias): - enc = urllib.parse.quote(alias) - r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token)) - if r.status_code == 404: - return None - r.raise_for_status() - return r.json()["room_id"] - - def create_room(token): - r = requests.post( - f"{BASE}/_matrix/client/v3/createRoom", - headers=auth(token), - json={ - "preset": "public_chat", - "name": ROOM_NAME, - "room_version": "11", - }, - ) - r.raise_for_status() - return r.json()["room_id"] - - def put_state(token, room_id, ev_type, content): - r = requests.put( - f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/{ev_type}", - headers=auth(token), - json=content, - ) - r.raise_for_status() - - def set_directory_visibility(token, room_id, visibility): - r = requests.put( - f"{BASE}/_matrix/client/v3/directory/list/room/{urllib.parse.quote(room_id)}", - headers=auth(token), - json={"visibility": visibility}, - ) - r.raise_for_status() - - def delete_alias(token, alias): - enc = urllib.parse.quote(alias) - r = requests.delete(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token)) - if r.status_code in (200, 202, 404): - return - r.raise_for_status() - - def put_alias(token, alias, room_id): - enc = urllib.parse.quote(alias) - r = requests.put( - f"{BASE}/_matrix/client/v3/directory/room/{enc}", - headers=auth(token), - json={"room_id": room_id}, - ) - r.raise_for_status() - - def list_joined_members(token, room_id): - r = requests.get( - f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members?membership=join", - headers=auth(token), - ) - r.raise_for_status() - members = [] - for ev in r.json().get("chunk", []): - if ev.get("type") != "m.room.member": - continue - uid = ev.get("state_key") - if not isinstance(uid, str) or not uid.startswith("@"): - continue - members.append(uid) - return members - - def invite_user(token, room_id, user_id): - r = requests.post( - f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/invite", - headers=auth(token), - json={"user_id": user_id}, - ) - if r.status_code in (200, 202): - return - r.raise_for_status() - - def send_message(token, room_id, body): - r = requests.post( - f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message", - headers=auth(token), - json={"msgtype": "m.text", "body": body}, - ) - r.raise_for_status() - return r.json()["event_id"] - - def login_with_retry(): - last = None - for attempt in range(1, 6): - try: - return login(SEEDER_USER, SEEDER_PASS) - except Exception as exc: # noqa: BLE001 - last = exc - time.sleep(attempt * 2) - raise last - - token = login_with_retry() - - old_room_id = resolve_alias(token, ROOM_ALIAS) - if not old_room_id: - raise SystemExit(f"alias {ROOM_ALIAS} not found; refusing to proceed") - - new_room_id = create_room(token) - - # Configure the new room. - put_state(token, new_room_id, "m.room.join_rules", {"join_rule": "public"}) - put_state(token, new_room_id, "m.room.guest_access", {"guest_access": "can_join"}) - put_state(token, new_room_id, "m.room.history_visibility", {"history_visibility": "shared"}) - put_state(token, new_room_id, "m.room.power_levels", POWER_LEVELS) - - # Move the alias. - delete_alias(token, ROOM_ALIAS) - put_alias(token, ROOM_ALIAS, new_room_id) - put_state(token, new_room_id, "m.room.canonical_alias", {"alias": ROOM_ALIAS}) - - set_directory_visibility(token, new_room_id, "public") - - # Invite the bot and all joined members of the old room. - bot_user_id = f"@{BOT_USER}:{SERVER_NAME}" - invite_user(token, new_room_id, bot_user_id) - for uid in list_joined_members(token, old_room_id): - if uid == f"@{SEEDER_USER}:{SERVER_NAME}": - continue - localpart = uid.split(":", 1)[0].lstrip("@") - if localpart.isdigit(): - continue - invite_user(token, new_room_id, uid) - - # Pin the guest invite message in the new room. - event_id = send_message(token, new_room_id, PIN_MESSAGE) - put_state(token, new_room_id, "m.room.pinned_events", {"pinned": [event_id]}) - - # De-list and tombstone the old room. - set_directory_visibility(token, old_room_id, "private") - put_state(token, old_room_id, "m.room.join_rules", {"join_rule": "invite"}) - put_state(token, old_room_id, "m.room.guest_access", {"guest_access": "forbidden"}) - put_state( - token, - old_room_id, - "m.room.tombstone", - {"body": "Othrys has been reset. Please join the new room.", "replacement_room": new_room_id}, - ) - send_message( - token, - old_room_id, - "Othrys was reset. Join the new room at https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join", - ) - - print(f"old_room_id={old_room_id}") - print(f"new_room_id={new_room_id}") - PY - volumeMounts: - - name: vault-scripts - mountPath: /vault/scripts - readOnly: true - volumes: - - name: vault-scripts - configMap: - name: comms-vault-env - defaultMode: 0555 diff --git a/services/comms/seed-othrys-room.yaml b/services/comms/seed-othrys-room.yaml deleted file mode 100644 index 804d330c..00000000 --- a/services/comms/seed-othrys-room.yaml +++ /dev/null @@ -1,185 +0,0 @@ -# services/comms/seed-othrys-room.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: seed-othrys-room - namespace: comms - labels: - atlas.bstein.dev/glue: "true" -spec: - schedule: "*/10 * * * *" - suspend: true - concurrencyPolicy: Forbid - jobTemplate: - spec: - backoffLimit: 0 - template: - metadata: - annotations: - vault.hashicorp.com/agent-inject: "true" - vault.hashicorp.com/agent-pre-populate-only: "true" - vault.hashicorp.com/role: "comms" - vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" - vault.hashicorp.com/agent-inject-template-turn-secret: | - {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" - vault.hashicorp.com/agent-inject-template-livekit-primary: | - {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" - vault.hashicorp.com/agent-inject-template-bot-pass: | - {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" - vault.hashicorp.com/agent-inject-template-seeder-pass: | - {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" - vault.hashicorp.com/agent-inject-template-chat-matrix: | - {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" - vault.hashicorp.com/agent-inject-template-chat-homepage: | - {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" - vault.hashicorp.com/agent-inject-template-mas-admin-secret: | - {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" - vault.hashicorp.com/agent-inject-template-synapse-db-pass: | - {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" - vault.hashicorp.com/agent-inject-template-mas-db-pass: | - {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" - vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | - {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" - vault.hashicorp.com/agent-inject-template-mas-kc-secret: | - {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} - spec: - restartPolicy: Never - serviceAccountName: comms-vault - containers: - - name: seed - image: python:3.11-slim - env: - - name: SYNAPSE_BASE - value: http://othrys-synapse-matrix-synapse:8008 - - name: AUTH_BASE - value: http://matrix-authentication-service:8080 - - name: SEEDER_USER - value: othrys-seeder - - name: BOT_USER - value: atlasbot - command: - - /bin/sh - - -c - - | - set -euo pipefail - . /vault/scripts/comms_vault_env.sh - pip install --no-cache-dir requests pyyaml >/dev/null - python - <<'PY' - import os, requests, urllib.parse - - BASE = os.environ["SYNAPSE_BASE"] - AUTH_BASE = os.environ.get("AUTH_BASE", BASE) - - def canon_user(user): - u = (user or "").strip() - if u.startswith("@") and ":" in u: - return u - u = u.lstrip("@") - if ":" in u: - return f"@{u}" - return f"@{u}:live.bstein.dev" - - def login(user, password): - r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={ - "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": canon_user(user)}, - "password": password, - }) - if r.status_code != 200: - raise SystemExit(f"login failed: {r.status_code} {r.text}") - return r.json()["access_token"] - - def ensure_user(token, localpart, password, admin): - headers = {"Authorization": f"Bearer {token}"} - user_id = f"@{localpart}:live.bstein.dev" - url = f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}" - res = requests.get(url, headers=headers) - if res.status_code == 200: - return - payload = {"password": password, "admin": admin, "deactivated": False} - create = requests.put(url, headers=headers, json=payload) - if create.status_code not in (200, 201): - raise SystemExit(f"create user {user_id} failed: {create.status_code} {create.text}") - - def ensure_room(token): - headers = {"Authorization": f"Bearer {token}"} - alias = "#othrys:live.bstein.dev" - alias_enc = "%23othrys%3Alive.bstein.dev" - exists = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{alias_enc}", headers=headers) - if exists.status_code == 200: - room_id = exists.json()["room_id"] - else: - create = requests.post(f"{BASE}/_matrix/client/v3/createRoom", headers=headers, json={ - "preset": "public_chat", - "name": "Othrys", - "room_alias_name": "othrys", - "initial_state": [], - "power_level_content_override": {"events_default": 0, "users_default": 0, "state_default": 50}, - }) - if create.status_code not in (200, 409): - raise SystemExit(f"create room failed: {create.status_code} {create.text}") - exists = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{alias_enc}", headers=headers) - room_id = exists.json()["room_id"] - state_events = [ - ("m.room.join_rules", {"join_rule": "public"}), - ("m.room.guest_access", {"guest_access": "can_join"}), - ("m.room.history_visibility", {"history_visibility": "shared"}), - ("m.room.canonical_alias", {"alias": alias}), - ] - for ev_type, content in state_events: - requests.put(f"{BASE}/_matrix/client/v3/rooms/{room_id}/state/{ev_type}", headers=headers, json=content) - requests.put(f"{BASE}/_matrix/client/v3/directory/list/room/{room_id}", headers=headers, json={"visibility": "public"}) - return room_id - - def join_user(token, room_id, user_id): - headers = {"Authorization": f"Bearer {token}"} - requests.post(f"{BASE}/_synapse/admin/v1/join/{urllib.parse.quote(room_id)}", headers=headers, json={"user_id": user_id}) - - def join_all_locals(token, room_id): - headers = {"Authorization": f"Bearer {token}"} - users = [] - from_token = None - while True: - url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100" - if from_token: - url += f"&from={from_token}" - res = requests.get(url, headers=headers).json() - users.extend([u["name"] for u in res.get("users", [])]) - from_token = res.get("next_token") - if not from_token: - break - for uid in users: - join_user(token, room_id, uid) - - token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"]) - ensure_user(token, os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"], admin=True) - ensure_user(token, os.environ["BOT_USER"], os.environ["BOT_PASS"], admin=False) - room_id = ensure_room(token) - join_user(token, room_id, f"@{os.environ['BOT_USER']}:live.bstein.dev") - join_all_locals(token, room_id) - PY - volumeMounts: - - name: synapse-config - mountPath: /config - readOnly: true - - name: vault-scripts - mountPath: /vault/scripts - readOnly: true - volumes: - - name: synapse-config - secret: - secretName: othrys-synapse-matrix-synapse - - name: vault-scripts - configMap: - name: comms-vault-env - defaultMode: 0555 diff --git a/services/finance/firefly-cronjob.yaml b/services/finance/firefly-cronjob.yaml deleted file mode 100644 index 9e5c8522..00000000 --- a/services/finance/firefly-cronjob.yaml +++ /dev/null @@ -1,56 +0,0 @@ -# services/finance/firefly-cronjob.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: firefly-cron - namespace: finance -spec: - schedule: "0 3 * * *" - suspend: true - concurrencyPolicy: Forbid - successfulJobsHistoryLimit: 1 - failedJobsHistoryLimit: 3 - jobTemplate: - spec: - backoffLimit: 1 - template: - metadata: - annotations: - vault.hashicorp.com/agent-inject: "true" - vault.hashicorp.com/agent-pre-populate-only: "true" - vault.hashicorp.com/role: "finance" - vault.hashicorp.com/agent-inject-secret-firefly-cron-token: "kv/data/atlas/finance/firefly-secrets" - vault.hashicorp.com/agent-inject-template-firefly-cron-token: | - {{- with secret "kv/data/atlas/finance/firefly-secrets" -}} - {{ .Data.data.STATIC_CRON_TOKEN }} - {{- end -}} - spec: - serviceAccountName: finance-vault - restartPolicy: Never - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - preference: - matchExpressions: - - key: hardware - operator: In - values: ["rpi5"] - - weight: 70 - preference: - matchExpressions: - - key: hardware - operator: In - values: ["rpi4"] - nodeSelector: - kubernetes.io/arch: arm64 - node-role.kubernetes.io/worker: "true" - containers: - - name: cron - image: curlimages/curl:8.5.0 - command: ["/bin/sh", "-c"] - args: - - | - set -eu - token="$(cat /vault/secrets/firefly-cron-token)" - curl -fsS "http://firefly.finance.svc.cluster.local/api/v1/cron/${token}" diff --git a/services/finance/firefly-user-sync-cronjob.yaml b/services/finance/firefly-user-sync-cronjob.yaml deleted file mode 100644 index aeadfadc..00000000 --- a/services/finance/firefly-user-sync-cronjob.yaml +++ /dev/null @@ -1,92 +0,0 @@ -# services/finance/firefly-user-sync-cronjob.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: firefly-user-sync - namespace: finance - labels: - atlas.bstein.dev/glue: "true" -spec: - schedule: "0 6 * * *" - suspend: true - concurrencyPolicy: Forbid - successfulJobsHistoryLimit: 1 - failedJobsHistoryLimit: 3 - jobTemplate: - spec: - backoffLimit: 0 - template: - metadata: - annotations: - vault.hashicorp.com/agent-inject: "true" - vault.hashicorp.com/agent-pre-populate-only: "true" - vault.hashicorp.com/role: "finance" - vault.hashicorp.com/agent-inject-secret-firefly-env.sh: "kv/data/atlas/finance/firefly-db" - vault.hashicorp.com/agent-inject-template-firefly-env.sh: | - {{ with secret "kv/data/atlas/finance/firefly-db" }} - export DB_CONNECTION="pgsql" - export DB_HOST="{{ .Data.data.DB_HOST }}" - export DB_PORT="{{ .Data.data.DB_PORT }}" - export DB_DATABASE="{{ .Data.data.DB_DATABASE }}" - export DB_USERNAME="{{ .Data.data.DB_USERNAME }}" - export DB_PASSWORD="$(cat /vault/secrets/firefly-db-password)" - {{ end }} - {{ with secret "kv/data/atlas/finance/firefly-secrets" }} - export APP_KEY="$(cat /vault/secrets/firefly-app-key)" - {{ end }} - vault.hashicorp.com/agent-inject-secret-firefly-db-password: "kv/data/atlas/finance/firefly-db" - vault.hashicorp.com/agent-inject-template-firefly-db-password: | - {{- with secret "kv/data/atlas/finance/firefly-db" -}} - {{ .Data.data.DB_PASSWORD }} - {{- end -}} - vault.hashicorp.com/agent-inject-secret-firefly-app-key: "kv/data/atlas/finance/firefly-secrets" - vault.hashicorp.com/agent-inject-template-firefly-app-key: | - {{- with secret "kv/data/atlas/finance/firefly-secrets" -}} - {{ .Data.data.APP_KEY }} - {{- end -}} - spec: - serviceAccountName: finance-vault - restartPolicy: Never - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - preference: - matchExpressions: - - key: hardware - operator: In - values: ["rpi5"] - - weight: 70 - preference: - matchExpressions: - - key: hardware - operator: In - values: ["rpi4"] - nodeSelector: - kubernetes.io/arch: arm64 - node-role.kubernetes.io/worker: "true" - containers: - - name: sync - image: fireflyiii/core:version-6.4.15 - command: ["/bin/sh", "-c"] - args: - - | - set -eu - . /vault/secrets/firefly-env.sh - exec php /scripts/firefly_user_sync.php - env: - - name: APP_ENV - value: production - - name: APP_DEBUG - value: "false" - - name: TZ - value: Etc/UTC - volumeMounts: - - name: firefly-user-sync-script - mountPath: /scripts - readOnly: true - volumes: - - name: firefly-user-sync-script - configMap: - name: firefly-user-sync-script - defaultMode: 0555 diff --git a/services/finance/kustomization.yaml b/services/finance/kustomization.yaml index 1559f5c8..457cd1f3 100644 --- a/services/finance/kustomization.yaml +++ b/services/finance/kustomization.yaml @@ -12,8 +12,6 @@ resources: - oneoffs/finance-secrets-ensure-job.yaml - actual-budget-deployment.yaml - firefly-deployment.yaml - - firefly-user-sync-cronjob.yaml - - firefly-cronjob.yaml - actual-budget-service.yaml - firefly-service.yaml - actual-budget-ingress.yaml @@ -24,9 +22,6 @@ configMapGenerator: - name: actual-openid-bootstrap-script files: - actual_openid_bootstrap.mjs=scripts/actual_openid_bootstrap.mjs - - name: firefly-user-sync-script - files: - - firefly_user_sync.php=scripts/firefly_user_sync.php - name: finance-secrets-ensure-script files: - finance_secrets_ensure.py=scripts/finance_secrets_ensure.py diff --git a/services/finance/scripts/firefly_user_sync.php b/services/finance/scripts/firefly_user_sync.php deleted file mode 100644 index 4036c3d8..00000000 --- a/services/finance/scripts/firefly_user_sync.php +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env php -make(ConsoleKernel::class); -$kernel->bootstrap(); - -try { - FireflyConfig::set('single_user_mode', true); -} catch (Throwable $exc) { - error_line('failed to enforce single_user_mode: '.$exc->getMessage()); -} - -$repository = $app->make(UserRepositoryInterface::class); - -$existing_user = User::where('email', $email)->first(); -$first_user = User::count() == 0; - -if (!$existing_user) { - $existing_user = User::create( - [ - 'email' => $email, - 'password' => bcrypt($password), - 'blocked' => false, - 'blocked_code' => null, - ] - ); - - if ($first_user) { - $role = Role::where('name', 'owner')->first(); - if ($role) { - $existing_user->roles()->attach($role); - } - } - - log_line(sprintf('created firefly user %s', $email)); -} else { - log_line(sprintf('updating firefly user %s', $email)); -} - -$existing_user->blocked = false; -$existing_user->blocked_code = null; -$existing_user->save(); - -$repository->changePassword($existing_user, $password); -CreatesGroupMemberships::createGroupMembership($existing_user); - -log_line('firefly user sync complete'); diff --git a/services/health/kustomization.yaml b/services/health/kustomization.yaml index 4dccf8c0..9a346b9a 100644 --- a/services/health/kustomization.yaml +++ b/services/health/kustomization.yaml @@ -8,18 +8,8 @@ resources: - portal-rbac.yaml - wger-media-pvc.yaml - wger-static-pvc.yaml - - wger-admin-ensure-cronjob.yaml - - wger-user-sync-cronjob.yaml - wger-deployment.yaml - wger-service.yaml - wger-ingress.yaml generatorOptions: disableNameSuffixHash: true -configMapGenerator: - - name: wger-nginx-config - files: - - default.conf=config/nginx.conf - - nginx.conf=config/nginx-main.conf - - name: wger-user-sync-script - files: - - wger_user_sync.py=scripts/wger_user_sync.py diff --git a/services/health/scripts/wger_user_sync.py b/services/health/scripts/wger_user_sync.py deleted file mode 100644 index 4963c793..00000000 --- a/services/health/scripts/wger_user_sync.py +++ /dev/null @@ -1,120 +0,0 @@ -#!/usr/bin/env python3 - -from __future__ import annotations - -import os -import sys - -import django - - -def _env(name: str, default: str = "") -> str: - value = os.getenv(name, default) - return value.strip() if isinstance(value, str) else "" - - -def _setup_django() -> None: - os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings.main") - django.setup() - - -def _set_default_gym(user) -> None: - try: - from wger.gym.models import GymConfig - except Exception: - return - - try: - config = GymConfig.objects.first() - except Exception: - return - - if not config or not getattr(config, "default_gym", None): - return - - profile = getattr(user, "userprofile", None) - if not profile or getattr(profile, "gym", None): - return - - profile.gym = config.default_gym - profile.save() - - -def _ensure_profile(user) -> None: - profile = getattr(user, "userprofile", None) - if not profile: - return - if hasattr(profile, "email_verified") and not profile.email_verified: - profile.email_verified = True - if hasattr(profile, "is_temporary") and profile.is_temporary: - profile.is_temporary = False - profile.save() - - -def _ensure_admin(username: str, password: str, email: str) -> None: - from django.contrib.auth.models import User - - if not username or not password: - raise RuntimeError("admin username/password missing") - - user, created = User.objects.get_or_create(username=username) - if created: - user.is_active = True - if not user.is_staff: - user.is_staff = True - if email: - user.email = email - user.set_password(password) - user.save() - - _ensure_profile(user) - _set_default_gym(user) - print(f"ensured admin user {username}") - - -def _ensure_user(username: str, password: str, email: str) -> None: - from django.contrib.auth.models import User - - if not username or not password: - raise RuntimeError("username/password missing") - - user, created = User.objects.get_or_create(username=username) - if created: - user.is_active = True - if email and user.email != email: - user.email = email - user.set_password(password) - user.save() - - _ensure_profile(user) - _set_default_gym(user) - action = "created" if created else "updated" - print(f"{action} user {username}") - - -def main() -> int: - admin_user = _env("WGER_ADMIN_USERNAME") - admin_password = _env("WGER_ADMIN_PASSWORD") - admin_email = _env("WGER_ADMIN_EMAIL") - - username = _env("WGER_USERNAME") or _env("ONLY_USERNAME") - password = _env("WGER_PASSWORD") - email = _env("WGER_EMAIL") - - if not any([admin_user and admin_password, username and password]): - print("no admin or user payload provided; exiting") - return 0 - - _setup_django() - - if admin_user and admin_password: - _ensure_admin(admin_user, admin_password, admin_email) - - if username and password: - _ensure_user(username, password, email) - - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/services/health/wger-admin-ensure-cronjob.yaml b/services/health/wger-admin-ensure-cronjob.yaml deleted file mode 100644 index a1063dd9..00000000 --- a/services/health/wger-admin-ensure-cronjob.yaml +++ /dev/null @@ -1,120 +0,0 @@ -# services/health/wger-admin-ensure-cronjob.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: wger-admin-ensure - namespace: health - labels: - atlas.bstein.dev/glue: "true" -spec: - schedule: "15 3 * * *" - suspend: true - concurrencyPolicy: Forbid - successfulJobsHistoryLimit: 1 - failedJobsHistoryLimit: 3 - jobTemplate: - spec: - backoffLimit: 1 - template: - metadata: - annotations: - vault.hashicorp.com/agent-inject: "true" - vault.hashicorp.com/agent-pre-populate-only: "true" - vault.hashicorp.com/role: "health" - vault.hashicorp.com/agent-inject-secret-wger-env: "kv/data/atlas/health/wger-db" - vault.hashicorp.com/agent-inject-template-wger-env: | - {{ with secret "kv/data/atlas/health/wger-db" }} - export DJANGO_DB_HOST="{{ .Data.data.DJANGO_DB_HOST }}" - export DJANGO_DB_PORT="{{ .Data.data.DJANGO_DB_PORT }}" - export DJANGO_DB_DATABASE="{{ .Data.data.DJANGO_DB_DATABASE }}" - export DJANGO_DB_USER="{{ .Data.data.DJANGO_DB_USER }}" - export DJANGO_DB_PASSWORD="$(cat /vault/secrets/wger-db-password)" - {{ end }} - {{ with secret "kv/data/atlas/health/wger-secrets" }} - export SECRET_KEY="$(cat /vault/secrets/wger-secret-key)" - export SIGNING_KEY="$(cat /vault/secrets/wger-signing-key)" - {{ end }} - {{ with secret "kv/data/atlas/health/wger-admin" }} - export WGER_ADMIN_USERNAME="$(cat /vault/secrets/wger-admin-username)" - export WGER_ADMIN_PASSWORD="$(cat /vault/secrets/wger-admin-password)" - {{ end }} - vault.hashicorp.com/agent-inject-secret-wger-db-password: "kv/data/atlas/health/wger-db" - vault.hashicorp.com/agent-inject-template-wger-db-password: | - {{- with secret "kv/data/atlas/health/wger-db" -}} - {{ .Data.data.DJANGO_DB_PASSWORD }} - {{- end -}} - vault.hashicorp.com/agent-inject-secret-wger-secret-key: "kv/data/atlas/health/wger-secrets" - vault.hashicorp.com/agent-inject-template-wger-secret-key: | - {{- with secret "kv/data/atlas/health/wger-secrets" -}} - {{ .Data.data.SECRET_KEY }} - {{- end -}} - vault.hashicorp.com/agent-inject-secret-wger-signing-key: "kv/data/atlas/health/wger-secrets" - vault.hashicorp.com/agent-inject-template-wger-signing-key: | - {{- with secret "kv/data/atlas/health/wger-secrets" -}} - {{ .Data.data.SIGNING_KEY }} - {{- end -}} - vault.hashicorp.com/agent-inject-secret-wger-admin-username: "kv/data/atlas/health/wger-admin" - vault.hashicorp.com/agent-inject-template-wger-admin-username: | - {{- with secret "kv/data/atlas/health/wger-admin" -}} - {{ .Data.data.username }} - {{- end -}} - vault.hashicorp.com/agent-inject-secret-wger-admin-password: "kv/data/atlas/health/wger-admin" - vault.hashicorp.com/agent-inject-template-wger-admin-password: | - {{- with secret "kv/data/atlas/health/wger-admin" -}} - {{ .Data.data.password }} - {{- end -}} - spec: - serviceAccountName: health-vault-sync - restartPolicy: Never - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - preference: - matchExpressions: - - key: hardware - operator: In - values: ["rpi5"] - - weight: 70 - preference: - matchExpressions: - - key: hardware - operator: In - values: ["rpi4"] - nodeSelector: - kubernetes.io/arch: arm64 - node-role.kubernetes.io/worker: "true" - containers: - - name: ensure - image: wger/server@sha256:710588b78af4e0aa0b4d8a8061e4563e16eae80eeaccfe7f9e0d9cbdd7f0cbc5 - imagePullPolicy: IfNotPresent - command: ["/bin/sh", "-c"] - args: - - | - set -eu - . /vault/secrets/wger-env - exec python3 /scripts/wger_user_sync.py - env: - - name: SITE_URL - value: https://health.bstein.dev - - name: TIME_ZONE - value: Etc/UTC - - name: TZ - value: Etc/UTC - - name: DJANGO_DEBUG - value: "False" - - name: DJANGO_DB_ENGINE - value: django.db.backends.postgresql - - name: DJANGO_CACHE_BACKEND - value: django.core.cache.backends.locmem.LocMemCache - - name: DJANGO_CACHE_LOCATION - value: wger-cache - volumeMounts: - - name: wger-user-sync-script - mountPath: /scripts - readOnly: true - volumes: - - name: wger-user-sync-script - configMap: - name: wger-user-sync-script - defaultMode: 0555 diff --git a/services/health/wger-user-sync-cronjob.yaml b/services/health/wger-user-sync-cronjob.yaml deleted file mode 100644 index de2dbb96..00000000 --- a/services/health/wger-user-sync-cronjob.yaml +++ /dev/null @@ -1,106 +0,0 @@ -# services/health/wger-user-sync-cronjob.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: wger-user-sync - namespace: health - labels: - atlas.bstein.dev/glue: "true" -spec: - schedule: "0 5 * * *" - suspend: true - concurrencyPolicy: Forbid - successfulJobsHistoryLimit: 1 - failedJobsHistoryLimit: 3 - jobTemplate: - spec: - backoffLimit: 0 - template: - metadata: - annotations: - vault.hashicorp.com/agent-inject: "true" - vault.hashicorp.com/agent-pre-populate-only: "true" - vault.hashicorp.com/role: "health" - vault.hashicorp.com/agent-inject-secret-wger-env: "kv/data/atlas/health/wger-db" - vault.hashicorp.com/agent-inject-template-wger-env: | - {{ with secret "kv/data/atlas/health/wger-db" }} - export DJANGO_DB_HOST="{{ .Data.data.DJANGO_DB_HOST }}" - export DJANGO_DB_PORT="{{ .Data.data.DJANGO_DB_PORT }}" - export DJANGO_DB_DATABASE="{{ .Data.data.DJANGO_DB_DATABASE }}" - export DJANGO_DB_USER="{{ .Data.data.DJANGO_DB_USER }}" - export DJANGO_DB_PASSWORD="$(cat /vault/secrets/wger-db-password)" - {{ end }} - {{ with secret "kv/data/atlas/health/wger-secrets" }} - export SECRET_KEY="$(cat /vault/secrets/wger-secret-key)" - export SIGNING_KEY="$(cat /vault/secrets/wger-signing-key)" - {{ end }} - vault.hashicorp.com/agent-inject-secret-wger-db-password: "kv/data/atlas/health/wger-db" - vault.hashicorp.com/agent-inject-template-wger-db-password: | - {{- with secret "kv/data/atlas/health/wger-db" -}} - {{ .Data.data.DJANGO_DB_PASSWORD }} - {{- end -}} - vault.hashicorp.com/agent-inject-secret-wger-secret-key: "kv/data/atlas/health/wger-secrets" - vault.hashicorp.com/agent-inject-template-wger-secret-key: | - {{- with secret "kv/data/atlas/health/wger-secrets" -}} - {{ .Data.data.SECRET_KEY }} - {{- end -}} - vault.hashicorp.com/agent-inject-secret-wger-signing-key: "kv/data/atlas/health/wger-secrets" - vault.hashicorp.com/agent-inject-template-wger-signing-key: | - {{- with secret "kv/data/atlas/health/wger-secrets" -}} - {{ .Data.data.SIGNING_KEY }} - {{- end -}} - spec: - serviceAccountName: health-vault-sync - restartPolicy: Never - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - preference: - matchExpressions: - - key: hardware - operator: In - values: ["rpi5"] - - weight: 70 - preference: - matchExpressions: - - key: hardware - operator: In - values: ["rpi4"] - nodeSelector: - kubernetes.io/arch: arm64 - node-role.kubernetes.io/worker: "true" - containers: - - name: sync - image: wger/server@sha256:710588b78af4e0aa0b4d8a8061e4563e16eae80eeaccfe7f9e0d9cbdd7f0cbc5 - imagePullPolicy: IfNotPresent - command: ["/bin/sh", "-c"] - args: - - | - set -eu - . /vault/secrets/wger-env - exec python3 /scripts/wger_user_sync.py - env: - - name: SITE_URL - value: https://health.bstein.dev - - name: TIME_ZONE - value: Etc/UTC - - name: TZ - value: Etc/UTC - - name: DJANGO_DEBUG - value: "False" - - name: DJANGO_DB_ENGINE - value: django.db.backends.postgresql - - name: DJANGO_CACHE_BACKEND - value: django.core.cache.backends.locmem.LocMemCache - - name: DJANGO_CACHE_LOCATION - value: wger-cache - volumeMounts: - - name: wger-user-sync-script - mountPath: /scripts - readOnly: true - volumes: - - name: wger-user-sync-script - configMap: - name: wger-user-sync-script - defaultMode: 0555 diff --git a/services/mailu/kustomization.yaml b/services/mailu/kustomization.yaml index 3e0494ee..dd151497 100644 --- a/services/mailu/kustomization.yaml +++ b/services/mailu/kustomization.yaml @@ -14,7 +14,6 @@ resources: - serverstransport.yaml - ingressroute.yaml - oneoffs/mailu-sync-job.yaml - - mailu-sync-cronjob.yaml - front-lb.yaml configMapGenerator: diff --git a/services/mailu/mailu-sync-cronjob.yaml b/services/mailu/mailu-sync-cronjob.yaml deleted file mode 100644 index bbe9909e..00000000 --- a/services/mailu/mailu-sync-cronjob.yaml +++ /dev/null @@ -1,93 +0,0 @@ -# services/mailu/mailu-sync-cronjob.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: mailu-sync-nightly - namespace: mailu-mailserver - labels: - atlas.bstein.dev/glue: "true" -spec: - schedule: "30 4 * * *" - suspend: true - concurrencyPolicy: Forbid - jobTemplate: - spec: - template: - metadata: - annotations: - vault.hashicorp.com/agent-inject: "true" - vault.hashicorp.com/agent-pre-populate-only: "true" - vault.hashicorp.com/role: "mailu-mailserver" - vault.hashicorp.com/agent-inject-secret-mailu-db-secret__database: "kv/data/atlas/mailu/mailu-db-secret" - vault.hashicorp.com/agent-inject-template-mailu-db-secret__database: | - {{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.database }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mailu-db-secret__username: "kv/data/atlas/mailu/mailu-db-secret" - vault.hashicorp.com/agent-inject-template-mailu-db-secret__username: | - {{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.username }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mailu-db-secret__password: "kv/data/atlas/mailu/mailu-db-secret" - vault.hashicorp.com/agent-inject-template-mailu-db-secret__password: | - {{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.password }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-id: "kv/data/atlas/mailu/mailu-sync-credentials" - vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-id: | - {{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-id" }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-secret: "kv/data/atlas/mailu/mailu-sync-credentials" - vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-secret: | - {{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-secret" }}{{- end -}} - vault.hashicorp.com/agent-inject-secret-mailu-initial-account-secret__password: "kv/data/atlas/mailu/mailu-initial-account-secret" - vault.hashicorp.com/agent-inject-template-mailu-initial-account-secret__password: | - {{- with secret "kv/data/atlas/mailu/mailu-initial-account-secret" -}}{{ .Data.data.password }}{{- end -}} - spec: - restartPolicy: OnFailure - nodeSelector: - kubernetes.io/arch: arm64 - node-role.kubernetes.io/worker: "true" - serviceAccountName: mailu-vault-sync - containers: - - name: mailu-sync - image: python:3.11-alpine - imagePullPolicy: IfNotPresent - command: ["/bin/sh", "-c"] - args: - - | - set -euo pipefail - . /vault/scripts/mailu_vault_env.sh - pip install --no-cache-dir requests psycopg2-binary passlib >/tmp/pip.log \ - && python /app/sync.py - env: - - name: KEYCLOAK_BASE_URL - value: http://keycloak.sso.svc.cluster.local - - name: KEYCLOAK_REALM - value: atlas - - name: MAILU_DOMAIN - value: bstein.dev - - name: MAILU_DEFAULT_QUOTA - value: "20000000000" - - name: MAILU_SYSTEM_USERS - value: "no-reply-portal@bstein.dev,no-reply-vaultwarden@bstein.dev" - - name: MAILU_DB_HOST - value: postgres-service.postgres.svc.cluster.local - - name: MAILU_DB_PORT - value: "5432" - volumeMounts: - - name: sync-script - mountPath: /app/sync.py - subPath: sync.py - - name: vault-scripts - mountPath: /vault/scripts - readOnly: true - resources: - requests: - cpu: 50m - memory: 128Mi - limits: - cpu: 200m - memory: 256Mi - volumes: - - name: sync-script - configMap: - name: mailu-sync-script - defaultMode: 0444 - - name: vault-scripts - configMap: - name: mailu-vault-env - defaultMode: 0555 diff --git a/services/maintenance/ariadne-deployment.yaml b/services/maintenance/ariadne-deployment.yaml index 3a01c3e5..390e5b36 100644 --- a/services/maintenance/ariadne-deployment.yaml +++ b/services/maintenance/ariadne-deployment.yaml @@ -308,9 +308,9 @@ spec: - name: ARIADNE_SCHEDULE_IMAGE_SWEEPER value: "0 */4 * * *" - name: ARIADNE_SCHEDULE_VAULT_K8S_AUTH - value: "0 0 1 1 *" + value: "*/15 * * * *" - name: ARIADNE_SCHEDULE_VAULT_OIDC - value: "0 0 1 1 *" + value: "*/15 * * * *" - name: ARIADNE_SCHEDULE_COMMS_GUEST_NAME value: "*/5 * * * *" - name: ARIADNE_SCHEDULE_COMMS_PIN_INVITE diff --git a/services/monitoring/dashboards/atlas-jobs.json b/services/monitoring/dashboards/atlas-jobs.json index 799228c4..95ca2b41 100644 --- a/services/monitoring/dashboards/atlas-jobs.json +++ b/services/monitoring/dashboards/atlas-jobs.json @@ -235,7 +235,7 @@ { "id": 4, "type": "stat", - "title": "Glue Jobs Stale (>36h)", + "title": "Ariadne Schedules Stale (>36h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -248,7 +248,7 @@ }, "targets": [ { - "expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1))) or on() vector(0)", + "expr": "sum((((time() - ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room)$\"})) > bool 129600)) or on() vector(0)", "refId": "A" } ], @@ -303,7 +303,7 @@ { "id": 5, "type": "stat", - "title": "Glue Jobs Missing Success", + "title": "Ariadne Schedules Missing Success", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -316,7 +316,7 @@ }, "targets": [ { - "expr": "count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) or on() vector(0)", + "expr": "count((ariadne_schedule_next_run_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"} unless on(task) ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room)$\"})) or on() vector(0)", "refId": "A" } ], @@ -363,7 +363,7 @@ { "id": 6, "type": "stat", - "title": "Glue Jobs Suspended", + "title": "Ariadne Schedules Failed Last Run", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -376,7 +376,7 @@ }, "targets": [ { - "expr": "sum((kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1) or on() vector(0)", + "expr": "sum(((1 - ariadne_schedule_last_status{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room)$\"}) > bool 0)) or on() vector(0)", "refId": "A" } ], @@ -616,7 +616,7 @@ }, "targets": [ { - "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600)", + "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"}[$__range])) / 3600)", "refId": "A", "legendFormat": "{{task}}", "instant": true @@ -691,7 +691,7 @@ }, "targets": [ { - "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600)", + "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"}[$__range])) / 3600)", "refId": "A", "legendFormat": "{{task}}", "instant": true @@ -753,7 +753,7 @@ { "id": 12, "type": "bargauge", - "title": "Glue Jobs Last Success (hours ago)", + "title": "Ariadne Fast Schedule Last Success (hours ago)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -766,9 +766,9 @@ }, "targets": [ { - "expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)", + "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_seed_room)$\"}[$__range])) / 3600)", "refId": "A", - "legendFormat": "{{namespace}}/{{cronjob}}", + "legendFormat": "{{task}}", "instant": true } ], @@ -828,7 +828,7 @@ { "id": 13, "type": "bargauge", - "title": "Glue Jobs Last Schedule (hours ago)", + "title": "Ariadne Fast Schedule Next Run (hours from now)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -841,9 +841,9 @@ }, "targets": [ { - "expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)", + "expr": "sort_desc(((ariadne_schedule_next_run_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"} - time()) / 3600))", "refId": "A", - "legendFormat": "{{namespace}}/{{cronjob}}", + "legendFormat": "{{task}}", "instant": true } ], @@ -1348,6 +1348,6 @@ "tags": [ "atlas", "jobs", - "glue" + "ariadne" ] } diff --git a/services/monitoring/grafana-dashboard-jobs.yaml b/services/monitoring/grafana-dashboard-jobs.yaml index 45a53747..4001aa95 100644 --- a/services/monitoring/grafana-dashboard-jobs.yaml +++ b/services/monitoring/grafana-dashboard-jobs.yaml @@ -244,7 +244,7 @@ data: { "id": 4, "type": "stat", - "title": "Glue Jobs Stale (>36h)", + "title": "Ariadne Schedules Stale (>36h)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -257,7 +257,7 @@ data: }, "targets": [ { - "expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1))) or on() vector(0)", + "expr": "sum((((time() - ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room)$\"})) > bool 129600)) or on() vector(0)", "refId": "A" } ], @@ -312,7 +312,7 @@ data: { "id": 5, "type": "stat", - "title": "Glue Jobs Missing Success", + "title": "Ariadne Schedules Missing Success", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -325,7 +325,7 @@ data: }, "targets": [ { - "expr": "count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) or on() vector(0)", + "expr": "count((ariadne_schedule_next_run_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"} unless on(task) ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room)$\"})) or on() vector(0)", "refId": "A" } ], @@ -372,7 +372,7 @@ data: { "id": 6, "type": "stat", - "title": "Glue Jobs Suspended", + "title": "Ariadne Schedules Failed Last Run", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -385,7 +385,7 @@ data: }, "targets": [ { - "expr": "sum((kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1) or on() vector(0)", + "expr": "sum(((1 - ariadne_schedule_last_status{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room)$\"}) > bool 0)) or on() vector(0)", "refId": "A" } ], @@ -625,7 +625,7 @@ data: }, "targets": [ { - "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600)", + "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"}[$__range])) / 3600)", "refId": "A", "legendFormat": "{{task}}", "instant": true @@ -700,7 +700,7 @@ data: }, "targets": [ { - "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600)", + "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"}[$__range])) / 3600)", "refId": "A", "legendFormat": "{{task}}", "instant": true @@ -762,7 +762,7 @@ data: { "id": 12, "type": "bargauge", - "title": "Glue Jobs Last Success (hours ago)", + "title": "Ariadne Fast Schedule Last Success (hours ago)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -775,9 +775,9 @@ data: }, "targets": [ { - "expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)", + "expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_seed_room)$\"}[$__range])) / 3600)", "refId": "A", - "legendFormat": "{{namespace}}/{{cronjob}}", + "legendFormat": "{{task}}", "instant": true } ], @@ -837,7 +837,7 @@ data: { "id": 13, "type": "bargauge", - "title": "Glue Jobs Last Schedule (hours ago)", + "title": "Ariadne Fast Schedule Next Run (hours from now)", "datasource": { "type": "prometheus", "uid": "atlas-vm" @@ -850,9 +850,9 @@ data: }, "targets": [ { - "expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)", + "expr": "sort_desc(((ariadne_schedule_next_run_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"} - time()) / 3600))", "refId": "A", - "legendFormat": "{{namespace}}/{{cronjob}}", + "legendFormat": "{{task}}", "instant": true } ], @@ -1357,6 +1357,6 @@ data: "tags": [ "atlas", "jobs", - "glue" + "ariadne" ] } diff --git a/services/nextcloud-mail-sync/cronjob.yaml b/services/nextcloud-mail-sync/cronjob.yaml deleted file mode 100644 index 6913b603..00000000 --- a/services/nextcloud-mail-sync/cronjob.yaml +++ /dev/null @@ -1,113 +0,0 @@ -# services/nextcloud-mail-sync/cronjob.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: nextcloud-mail-sync - namespace: nextcloud - labels: - atlas.bstein.dev/glue: "true" -spec: - schedule: "0 5 * * *" - suspend: true - concurrencyPolicy: Forbid - successfulJobsHistoryLimit: 3 - failedJobsHistoryLimit: 1 - jobTemplate: - spec: - template: - metadata: - annotations: - vault.hashicorp.com/agent-inject: "true" - vault.hashicorp.com/agent-pre-populate-only: "true" - vault.hashicorp.com/role: "nextcloud" - vault.hashicorp.com/agent-inject-secret-nextcloud-env.sh: "kv/data/atlas/nextcloud/nextcloud-db" - vault.hashicorp.com/agent-inject-template-nextcloud-env.sh: | - {{ with secret "kv/data/atlas/nextcloud/nextcloud-db" }} - export POSTGRES_DB="{{ .Data.data.database }}" - export POSTGRES_USER="{{ index .Data.data "db-username" }}" - export POSTGRES_PASSWORD="{{ index .Data.data "db-password" }}" - {{ end }} - {{ with secret "kv/data/atlas/nextcloud/nextcloud-admin" }} - export NEXTCLOUD_ADMIN_USER="{{ index .Data.data "admin-user" }}" - export NEXTCLOUD_ADMIN_PASSWORD="{{ index .Data.data "admin-password" }}" - {{ end }} - export ADMIN_USER="${NEXTCLOUD_ADMIN_USER}" - export ADMIN_PASS="${NEXTCLOUD_ADMIN_PASSWORD}" - {{ with secret "kv/data/atlas/nextcloud/nextcloud-oidc" }} - export OIDC_CLIENT_ID="{{ index .Data.data "client-id" }}" - export OIDC_CLIENT_SECRET="{{ index .Data.data "client-secret" }}" - {{ end }} - {{ with secret "kv/data/atlas/shared/postmark-relay" }} - export SMTP_NAME="{{ index .Data.data "apikey" }}" - export SMTP_PASSWORD="{{ index .Data.data "apikey" }}" - {{ end }} - {{ with secret "kv/data/atlas/shared/keycloak-admin" }} - export KC_ADMIN_USER="{{ .Data.data.username }}" - export KC_ADMIN_PASS="{{ .Data.data.password }}" - {{ end }} - spec: - nodeSelector: - kubernetes.io/arch: arm64 - node-role.kubernetes.io/worker: "true" - restartPolicy: OnFailure - securityContext: - runAsUser: 0 - runAsGroup: 0 - serviceAccountName: nextcloud-vault - containers: - - name: mail-sync - image: nextcloud:29-apache - imagePullPolicy: IfNotPresent - command: - - /bin/sh - - -c - env: - - name: KC_BASE - value: http://keycloak.sso.svc.cluster.local - - name: KC_REALM - value: atlas - - name: MAILU_DOMAIN - value: bstein.dev - - name: POSTGRES_HOST - value: postgres-service.postgres.svc.cluster.local - resources: - requests: - cpu: 100m - memory: 256Mi - limits: - cpu: 500m - memory: 512Mi - volumeMounts: - - name: nextcloud-web - mountPath: /var/www/html - - name: nextcloud-config-pvc - mountPath: /var/www/html/config - - name: nextcloud-custom-apps - mountPath: /var/www/html/custom_apps - - name: nextcloud-user-data - mountPath: /var/www/html/data - - name: sync-script - mountPath: /sync/sync.sh - subPath: sync.sh - args: - - | - set -eu - . /vault/secrets/nextcloud-env.sh - exec /sync/sync.sh - volumes: - - name: nextcloud-config-pvc - persistentVolumeClaim: - claimName: nextcloud-config-v2 - - name: nextcloud-custom-apps - persistentVolumeClaim: - claimName: nextcloud-custom-apps-v2 - - name: nextcloud-user-data - persistentVolumeClaim: - claimName: nextcloud-user-data-v2 - - name: nextcloud-web - persistentVolumeClaim: - claimName: nextcloud-web-v2 - - name: sync-script - configMap: - name: nextcloud-mail-sync-script - defaultMode: 0755 diff --git a/services/nextcloud-mail-sync/kustomization.yaml b/services/nextcloud-mail-sync/kustomization.yaml index fb2a0775..78d8396c 100644 --- a/services/nextcloud-mail-sync/kustomization.yaml +++ b/services/nextcloud-mail-sync/kustomization.yaml @@ -3,11 +3,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization namespace: nextcloud resources: - - cronjob.yaml - portal-rbac.yaml -configMapGenerator: - - name: nextcloud-mail-sync-script - files: - - sync.sh=scripts/nextcloud-mail-sync.sh - options: - disableNameSuffixHash: true diff --git a/services/nextcloud-mail-sync/scripts/nextcloud-mail-sync.sh b/services/nextcloud-mail-sync/scripts/nextcloud-mail-sync.sh deleted file mode 100755 index 732b9fbd..00000000 --- a/services/nextcloud-mail-sync/scripts/nextcloud-mail-sync.sh +++ /dev/null @@ -1,235 +0,0 @@ -#!/bin/bash -set -euo pipefail - -KC_BASE="${KC_BASE:?}" -KC_REALM="${KC_REALM:?}" -KC_ADMIN_USER="${KC_ADMIN_USER:?}" -KC_ADMIN_PASS="${KC_ADMIN_PASS:?}" -MAILU_DOMAIN="${MAILU_DOMAIN:?}" -ONLY_USERNAME="${ONLY_USERNAME:-}" -POSTGRES_HOST="${POSTGRES_HOST:-}" -POSTGRES_DB="${POSTGRES_DB:-}" -POSTGRES_USER="${POSTGRES_USER:-}" -POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-}" - -if ! command -v jq >/dev/null 2>&1; then - apt-get update && apt-get install -y jq curl >/dev/null -fi - -ensure_psql() { - if command -v psql >/dev/null 2>&1; then - return 0 - fi - apt-get update && apt-get install -y postgresql-client >/dev/null -} - -set_editor_mode_richtext() { - local ids=("$@") - - if [[ ${#ids[@]} -eq 0 ]]; then - return 0 - fi - - if [[ -z "${POSTGRES_HOST}" || -z "${POSTGRES_DB}" || -z "${POSTGRES_USER}" || -z "${POSTGRES_PASSWORD}" ]]; then - echo "WARN: missing postgres env; cannot update mail editor_mode" >&2 - return 0 - fi - - ensure_psql - - local ids_csv - ids_csv=$(IFS=,; echo "${ids[*]}") - - PGPASSWORD="${POSTGRES_PASSWORD}" psql \ - -h "${POSTGRES_HOST}" \ - -U "${POSTGRES_USER}" \ - -d "${POSTGRES_DB}" \ - -v ON_ERROR_STOP=1 \ - -c "UPDATE oc_mail_accounts SET editor_mode='richtext' WHERE id IN (${ids_csv}) AND editor_mode <> 'richtext';" \ - >/dev/null -} - -list_mail_accounts() { - local user_id="${1}" - local export_out - - # Nextcloud Mail does not provide a list command; export is safe (does not print passwords). - if ! export_out=$(/usr/sbin/runuser -u www-data -- php occ mail:account:export "${user_id}"); then - echo "WARN: unable to export mail accounts for ${user_id}; skipping sync for safety" >&2 - return 1 - fi - - awk -v OFS='\t' ' - BEGIN { IGNORECASE=1; id="" } - $1 == "Account" { id=$2; sub(":", "", id); next } - $1 == "-" && tolower($2) ~ /^e-?mail:$/ { if (id) print id, $3 } - ' <<<"${export_out}" | sort -u -} - -token=$( - curl -fsS \ - --data-urlencode "grant_type=password" \ - --data-urlencode "client_id=admin-cli" \ - --data-urlencode "username=${KC_ADMIN_USER}" \ - --data-urlencode "password=${KC_ADMIN_PASS}" \ - "${KC_BASE}/realms/master/protocol/openid-connect/token" | jq -r '.access_token // empty' -) - -if [[ -z "${token}" || "${token}" == "null" ]]; then - echo "Failed to obtain admin token" - exit 1 -fi - -cd /var/www/html - -kc_users_url="${KC_BASE}/admin/realms/${KC_REALM}/users?max=2000&briefRepresentation=false" -if [[ -n "${ONLY_USERNAME}" ]]; then - username_q=$(jq -nr --arg v "${ONLY_USERNAME}" '$v|@uri') - kc_users_url="${KC_BASE}/admin/realms/${KC_REALM}/users?username=${username_q}&exact=true&max=1&briefRepresentation=false" -fi - -users=$(curl -fsS -H "Authorization: Bearer ${token}" "${kc_users_url}") -if ! jq -e 'type == "array"' >/dev/null 2>&1 <<<"${users}"; then - echo "ERROR: Keycloak user list is not an array; aborting sync" >&2 - exit 1 -fi - -kc_set_user_mail_meta() { - local user_id="${1}" - local primary_email="${2}" - local mailu_account_count="${3}" - local synced_at="${4}" - - # Fetch the full user representation so we don't accidentally clobber attributes. - local user_json updated_json - if ! user_json=$(curl -fsS -H "Authorization: Bearer ${token}" \ - "${KC_BASE}/admin/realms/${KC_REALM}/users/${user_id}"); then - echo "WARN: unable to fetch Keycloak user ${user_id} for metadata writeback" >&2 - return 1 - fi - - updated_json=$( - jq -c \ - --arg primary_email "${primary_email}" \ - --arg mailu_account_count "${mailu_account_count}" \ - --arg synced_at "${synced_at}" \ - ' - .attributes = (.attributes // {}) | - .attributes.nextcloud_mail_primary_email = [$primary_email] | - .attributes.nextcloud_mail_account_count = [$mailu_account_count] | - .attributes.nextcloud_mail_synced_at = [$synced_at] | - del(.access) - ' <<<"${user_json}" - ) - - curl -fsS -X PUT \ - -H "Authorization: Bearer ${token}" \ - -H "Content-Type: application/json" \ - -d "${updated_json}" \ - "${KC_BASE}/admin/realms/${KC_REALM}/users/${user_id}" >/dev/null -} - -while read -r user; do - user_id=$(jq -r '.id' <<<"${user}") - username=$(jq -r '.username' <<<"${user}") - keycloak_email=$(echo "${user}" | jq -r '.email // empty') - mailu_email=$(echo "${user}" | jq -r '(.attributes.mailu_email[0] // .attributes.mailu_email // empty)') - app_pw=$(echo "${user}" | jq -r '(.attributes.mailu_app_password[0] // .attributes.mailu_app_password // empty)') - - if [[ -z "${mailu_email}" ]]; then - if [[ -n "${keycloak_email}" && "${keycloak_email,,}" == *"@${MAILU_DOMAIN,,}" ]]; then - mailu_email="${keycloak_email}" - else - mailu_email="${username}@${MAILU_DOMAIN}" - fi - fi - - [[ -z "${mailu_email}" || -z "${app_pw}" ]] && continue - - if ! accounts=$(list_mail_accounts "${username}"); then - continue - fi - - # Manage only internal Mailu-domain accounts; leave any external accounts untouched. - mailu_accounts=$(awk -v d="${MAILU_DOMAIN,,}" 'tolower($2) ~ ("@" d "$") {print}' <<<"${accounts}" || true) - - desired_email="${mailu_email}" - primary_id="" - primary_email="" - - if [[ -n "${mailu_accounts}" ]]; then - while IFS=$'\t' read -r account_id account_email; do - if [[ -z "${primary_id}" ]]; then - primary_id="${account_id}" - primary_email="${account_email}" - fi - if [[ "${account_email,,}" == "${desired_email,,}" ]]; then - primary_id="${account_id}" - primary_email="${account_email}" - break - fi - done <<<"${mailu_accounts}" - - echo "Updating ${username} mail account ${primary_id} (${primary_email})" - /usr/sbin/runuser -u www-data -- php occ mail:account:update -q "${primary_id}" \ - --name "${username}" \ - --email "${desired_email}" \ - --imap-host mail.bstein.dev \ - --imap-port 993 \ - --imap-ssl-mode ssl \ - --imap-user "${desired_email}" \ - --imap-password "${app_pw}" \ - --smtp-host mail.bstein.dev \ - --smtp-port 587 \ - --smtp-ssl-mode tls \ - --smtp-user "${desired_email}" \ - --smtp-password "${app_pw}" \ - --auth-method password >/dev/null 2>&1 || true - - # Remove any extra Mailu-domain accounts for this user to prevent duplicates. - while IFS=$'\t' read -r account_id account_email; do - if [[ "${account_id}" == "${primary_id}" ]]; then - continue - fi - echo "Deleting extra mail account ${account_id} (${account_email})" - /usr/sbin/runuser -u www-data -- php occ mail:account:delete -q "${account_id}" >/dev/null 2>&1 || true - done <<<"${mailu_accounts}" - else - echo "Creating mail account for ${username} (${desired_email})" - /usr/sbin/runuser -u www-data -- php occ mail:account:create -q \ - "${username}" "${username}" "${desired_email}" \ - mail.bstein.dev 993 ssl "${desired_email}" "${app_pw}" \ - mail.bstein.dev 587 tls "${desired_email}" "${app_pw}" password >/dev/null 2>&1 || true - fi - - # Write non-secret metadata back to Keycloak for UI introspection and onboarding gating. - synced_at=$(date -u +"%Y-%m-%dT%H:%M:%SZ") - if accounts_after=$(list_mail_accounts "${username}"); then - mailu_accounts_after=$(awk -v d="${MAILU_DOMAIN,,}" 'tolower($2) ~ ("@" d "$") {print}' <<<"${accounts_after}" || true) - if [[ -n "${mailu_accounts_after}" ]]; then - mailu_account_count=$(printf '%s\n' "${mailu_accounts_after}" | wc -l | tr -d ' ') - else - mailu_account_count="0" - fi - primary_email_after="" - editor_mode_ids=() - if [[ -n "${mailu_accounts_after}" ]]; then - while IFS=$'\t' read -r _account_id account_email; do - editor_mode_ids+=("${_account_id}") - if [[ "${account_email,,}" == "${desired_email,,}" ]]; then - primary_email_after="${account_email}" - break - fi - if [[ -z "${primary_email_after}" ]]; then - primary_email_after="${account_email}" - fi - done <<<"${mailu_accounts_after}" - fi - set_editor_mode_richtext "${editor_mode_ids[@]}" - else - mailu_account_count="0" - primary_email_after="" - fi - - kc_set_user_mail_meta "${user_id}" "${primary_email_after}" "${mailu_account_count}" "${synced_at}" || true -done < <(jq -c '.[]' <<<"${users}") diff --git a/services/nextcloud/cronjob.yaml b/services/nextcloud/cronjob.yaml deleted file mode 100644 index 58d8aa1b..00000000 --- a/services/nextcloud/cronjob.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# services/nextcloud/cronjob.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: nextcloud-cron - namespace: nextcloud -spec: - schedule: "*/5 * * * *" - suspend: true - concurrencyPolicy: Forbid - jobTemplate: - spec: - template: - spec: - securityContext: - runAsUser: 33 - runAsGroup: 33 - fsGroup: 33 - restartPolicy: OnFailure - containers: - - name: nextcloud-cron - image: nextcloud:29-apache - imagePullPolicy: IfNotPresent - command: ["/bin/sh", "-c"] - args: - - "cd /var/www/html && php -f cron.php" - volumeMounts: - - name: nextcloud-web - mountPath: /var/www/html - - name: nextcloud-config-pvc - mountPath: /var/www/html/config - - name: nextcloud-custom-apps - mountPath: /var/www/html/custom_apps - - name: nextcloud-user-data - mountPath: /var/www/html/data - volumes: - - name: nextcloud-config-pvc - persistentVolumeClaim: - claimName: nextcloud-config-v2 - - name: nextcloud-custom-apps - persistentVolumeClaim: - claimName: nextcloud-custom-apps-v2 - - name: nextcloud-user-data - persistentVolumeClaim: - claimName: nextcloud-user-data-v2 - - name: nextcloud-web - persistentVolumeClaim: - claimName: nextcloud-web-v2 diff --git a/services/nextcloud/kustomization.yaml b/services/nextcloud/kustomization.yaml index ebaeaaf2..a67e965e 100644 --- a/services/nextcloud/kustomization.yaml +++ b/services/nextcloud/kustomization.yaml @@ -9,13 +9,5 @@ resources: - pvc.yaml - deployment.yaml - collabora.yaml - - cronjob.yaml - - maintenance-cronjob.yaml - service.yaml - ingress.yaml -configMapGenerator: - - name: nextcloud-maintenance-script - files: - - maintenance.sh=scripts/nextcloud-maintenance.sh - options: - disableNameSuffixHash: true diff --git a/services/nextcloud/maintenance-cronjob.yaml b/services/nextcloud/maintenance-cronjob.yaml deleted file mode 100644 index 177cc022..00000000 --- a/services/nextcloud/maintenance-cronjob.yaml +++ /dev/null @@ -1,98 +0,0 @@ -# services/nextcloud/maintenance-cronjob.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: nextcloud-maintenance - namespace: nextcloud -spec: - schedule: "30 4 * * *" - suspend: true - concurrencyPolicy: Forbid - jobTemplate: - spec: - template: - metadata: - annotations: - vault.hashicorp.com/agent-inject: "true" - vault.hashicorp.com/agent-pre-populate-only: "true" - vault.hashicorp.com/role: "nextcloud" - vault.hashicorp.com/agent-inject-secret-nextcloud-env.sh: "kv/data/atlas/nextcloud/nextcloud-db" - vault.hashicorp.com/agent-inject-template-nextcloud-env.sh: | - {{ with secret "kv/data/atlas/nextcloud/nextcloud-db" }} - export POSTGRES_DB="{{ .Data.data.database }}" - export POSTGRES_USER="{{ index .Data.data "db-username" }}" - export POSTGRES_PASSWORD="{{ index .Data.data "db-password" }}" - {{ end }} - {{ with secret "kv/data/atlas/nextcloud/nextcloud-admin" }} - export NEXTCLOUD_ADMIN_USER="{{ index .Data.data "admin-user" }}" - export NEXTCLOUD_ADMIN_PASSWORD="{{ index .Data.data "admin-password" }}" - {{ end }} - export ADMIN_USER="${NEXTCLOUD_ADMIN_USER}" - export ADMIN_PASS="${NEXTCLOUD_ADMIN_PASSWORD}" - {{ with secret "kv/data/atlas/nextcloud/nextcloud-oidc" }} - export OIDC_CLIENT_ID="{{ index .Data.data "client-id" }}" - export OIDC_CLIENT_SECRET="{{ index .Data.data "client-secret" }}" - {{ end }} - {{ with secret "kv/data/atlas/shared/postmark-relay" }} - export SMTP_NAME="{{ index .Data.data "apikey" }}" - export SMTP_PASSWORD="{{ index .Data.data "apikey" }}" - {{ end }} - {{ with secret "kv/data/atlas/shared/keycloak-admin" }} - export KC_ADMIN_USER="{{ .Data.data.username }}" - export KC_ADMIN_PASS="{{ .Data.data.password }}" - {{ end }} - spec: - restartPolicy: OnFailure - securityContext: - runAsUser: 0 - runAsGroup: 0 - serviceAccountName: nextcloud-vault - containers: - - name: maintenance - image: nextcloud:29-apache - imagePullPolicy: IfNotPresent - command: ["/bin/sh", "-c"] - args: - - | - set -eu - . /vault/secrets/nextcloud-env.sh - exec /maintenance/maintenance.sh - env: - - name: NC_URL - value: https://cloud.bstein.dev - volumeMounts: - - name: nextcloud-web - mountPath: /var/www/html - - name: nextcloud-config-pvc - mountPath: /var/www/html/config - - name: nextcloud-custom-apps - mountPath: /var/www/html/custom_apps - - name: nextcloud-user-data - mountPath: /var/www/html/data - - name: maintenance-script - mountPath: /maintenance/maintenance.sh - subPath: maintenance.sh - resources: - requests: - cpu: 100m - memory: 256Mi - limits: - cpu: 500m - memory: 512Mi - volumes: - - name: nextcloud-config-pvc - persistentVolumeClaim: - claimName: nextcloud-config-v2 - - name: nextcloud-custom-apps - persistentVolumeClaim: - claimName: nextcloud-custom-apps-v2 - - name: nextcloud-user-data - persistentVolumeClaim: - claimName: nextcloud-user-data-v2 - - name: nextcloud-web - persistentVolumeClaim: - claimName: nextcloud-web-v2 - - name: maintenance-script - configMap: - name: nextcloud-maintenance-script - defaultMode: 0755 diff --git a/services/nextcloud/scripts/nextcloud-maintenance.sh b/services/nextcloud/scripts/nextcloud-maintenance.sh deleted file mode 100755 index ab38616f..00000000 --- a/services/nextcloud/scripts/nextcloud-maintenance.sh +++ /dev/null @@ -1,108 +0,0 @@ -#!/bin/bash -set -euo pipefail - -NC_URL="${NC_URL:-https://cloud.bstein.dev}" -ADMIN_USER="${ADMIN_USER:?}" -ADMIN_PASS="${ADMIN_PASS:?}" - -export DEBIAN_FRONTEND=noninteractive -apt-get update -qq -apt-get install -y -qq curl jq >/dev/null - -run_occ() { - runuser -u www-data -- php /var/www/html/occ "$@" -} - -log() { echo "[$(date -Is)] $*"; } - -log "Ensuring Nextcloud app files are present" -if [[ ! -d /var/www/html/lib && -d /usr/src/nextcloud/lib ]]; then - rsync -a --delete \ - --exclude config \ - --exclude data \ - /usr/src/nextcloud/ /var/www/html/ -fi - -log "Ensuring Nextcloud permissions" -mkdir -p /var/www/html/data -chown 33:33 /var/www/html || true -chmod 775 /var/www/html || true -chown -R 33:33 /var/www/html/apps /var/www/html/custom_apps /var/www/html/data /var/www/html/config 2>/dev/null || true - -log "Applying Atlas theming" -run_occ config:app:set theming name --value "Atlas Cloud" -run_occ config:app:set theming slogan --value "Unified access to Atlas services" -run_occ config:app:set theming url --value "https://cloud.bstein.dev" -run_occ config:app:set theming color --value "#0f172a" -run_occ config:app:set theming disable-user-theming --value "yes" - -log "Applying Atlas Mail styling defaults" -run_occ app:install customcss >/dev/null 2>&1 || true -run_occ app:enable customcss >/dev/null 2>&1 || true -MAIL_CSS=$(cat <<'CSS' -.mail-message-body, .mail-message-body pre, .mail-message-body code, .mail-message-body table { - font-family: "Inter", "Source Sans 3", "Helvetica Neue", Arial, sans-serif; - font-size: 14px; - line-height: 1.6; - color: var(--color-main-text); -} -.mail-message-body pre { - background: rgba(15, 23, 42, 0.06); - padding: 12px; - border-radius: 8px; -} -.mail-message-body blockquote { - border-left: 3px solid var(--color-border); - padding-left: 12px; - margin: 8px 0; - color: var(--color-text-lighter); -} -.mail-message-body img { - max-width: 100%; - border-radius: 6px; -} -CSS -) -run_occ config:app:set customcss css --value "${MAIL_CSS}" >/dev/null - -log "Setting default quota to 250 GB" -run_occ config:app:set files default_quota --value "250 GB" - -API_BASE="${NC_URL}/ocs/v2.php/apps/external/api/v1" -AUTH=(-u "${ADMIN_USER}:${ADMIN_PASS}" -H "OCS-APIRequest: true") - -log "Removing existing external links" -existing=$(curl -sf "${AUTH[@]}" "${API_BASE}?format=json" | jq -r '.ocs.data[].id // empty') -for id in ${existing}; do - curl -sf "${AUTH[@]}" -X DELETE "${API_BASE}/sites/${id}?format=json" >/dev/null || true -done - -SITES=( - "Vaultwarden|https://vault.bstein.dev" - "Jellyfin|https://stream.bstein.dev" - "Gitea|https://scm.bstein.dev" - "Jenkins|https://ci.bstein.dev" - "Harbor|https://registry.bstein.dev" - "Vault|https://secret.bstein.dev" - "Jitsi|https://meet.bstein.dev" - "Grafana|https://metrics.bstein.dev" - "Chat LLM|https://chat.ai.bstein.dev" - "Vision|https://draw.ai.bstein.dev" - "STT/TTS|https://talk.ai.bstein.dev" -) - -log "Seeding external links" -for entry in "${SITES[@]}"; do - IFS="|" read -r name url <<<"${entry}" - curl -sf "${AUTH[@]}" -X POST "${API_BASE}/sites?format=json" \ - -d "name=${name}" \ - -d "url=${url}" \ - -d "lang=" \ - -d "type=link" \ - -d "device=" \ - -d "icon=" \ - -d "groups[]=" \ - -d "redirect=1" >/dev/null -done - -log "Maintenance run completed" diff --git a/services/vault/k8s-auth-config-cronjob.yaml b/services/vault/k8s-auth-config-cronjob.yaml deleted file mode 100644 index 43da16b4..00000000 --- a/services/vault/k8s-auth-config-cronjob.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# services/vault/k8s-auth-config-cronjob.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: vault-k8s-auth-config - namespace: vault - labels: - atlas.bstein.dev/glue: "true" -spec: - schedule: "*/15 * * * *" - suspend: false - concurrencyPolicy: Forbid - successfulJobsHistoryLimit: 1 - failedJobsHistoryLimit: 3 - jobTemplate: - spec: - backoffLimit: 1 - template: - spec: - serviceAccountName: vault-admin - restartPolicy: Never - nodeSelector: - kubernetes.io/arch: arm64 - node-role.kubernetes.io/worker: "true" - containers: - - name: configure-k8s-auth - image: hashicorp/vault:1.17.6 - imagePullPolicy: IfNotPresent - command: - - sh - - /scripts/vault_k8s_auth_configure.sh - env: - - name: VAULT_ADDR - value: http://10.43.57.249:8200 - - name: VAULT_K8S_ROLE - value: vault-admin - - name: VAULT_K8S_TOKEN_REVIEWER_JWT_FILE - value: /var/run/secrets/vault-token-reviewer/token - - name: VAULT_K8S_ROLE_TTL - value: 1h - volumeMounts: - - name: k8s-auth-config-script - mountPath: /scripts - readOnly: true - - name: token-reviewer - mountPath: /var/run/secrets/vault-token-reviewer - readOnly: true - volumes: - - name: k8s-auth-config-script - configMap: - name: vault-k8s-auth-config-script - defaultMode: 0555 - - name: token-reviewer - secret: - secretName: vault-admin-token-reviewer diff --git a/services/vault/kustomization.yaml b/services/vault/kustomization.yaml index 060077b3..6a63be48 100644 --- a/services/vault/kustomization.yaml +++ b/services/vault/kustomization.yaml @@ -10,21 +10,9 @@ resources: - rbac.yaml - configmap.yaml - statefulset.yaml - - k8s-auth-config-cronjob.yaml - - oidc-config-cronjob.yaml - service.yaml - ingress.yaml - certificate.yaml - serverstransport.yaml generatorOptions: disableNameSuffixHash: true -configMapGenerator: - - name: vault-oidc-config-script - files: - - vault_oidc_configure.sh=scripts/vault_oidc_configure.sh - - name: vault-k8s-auth-config-script - files: - - vault_k8s_auth_configure.sh=scripts/vault_k8s_auth_configure.sh - - name: vault-entrypoint - files: - - vault-entrypoint.sh=scripts/vault-entrypoint.sh diff --git a/services/vault/oidc-config-cronjob.yaml b/services/vault/oidc-config-cronjob.yaml deleted file mode 100644 index 4d317c55..00000000 --- a/services/vault/oidc-config-cronjob.yaml +++ /dev/null @@ -1,83 +0,0 @@ -# services/vault/oidc-config-cronjob.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: vault-oidc-config - namespace: vault - labels: - atlas.bstein.dev/glue: "true" -spec: - schedule: "*/15 * * * *" - suspend: true - concurrencyPolicy: Forbid - successfulJobsHistoryLimit: 1 - failedJobsHistoryLimit: 3 - jobTemplate: - spec: - backoffLimit: 1 - template: - metadata: - annotations: - vault.hashicorp.com/agent-inject: "true" - vault.hashicorp.com/agent-pre-populate-only: "true" - vault.hashicorp.com/role: "vault-admin" - vault.hashicorp.com/agent-inject-secret-vault-oidc-env.sh: "kv/data/atlas/vault/vault-oidc-config" - vault.hashicorp.com/agent-inject-template-vault-oidc-env.sh: | - {{ with secret "kv/data/atlas/vault/vault-oidc-config" }} - export VAULT_OIDC_DISCOVERY_URL="{{ .Data.data.discovery_url }}" - export VAULT_OIDC_CLIENT_ID="{{ .Data.data.client_id }}" - export VAULT_OIDC_CLIENT_SECRET="{{ .Data.data.client_secret }}" - export VAULT_OIDC_DEFAULT_ROLE="{{ .Data.data.default_role }}" - export VAULT_OIDC_SCOPES="{{ .Data.data.scopes }}" - export VAULT_OIDC_USER_CLAIM="{{ .Data.data.user_claim }}" - export VAULT_OIDC_GROUPS_CLAIM="{{ .Data.data.groups_claim }}" - export VAULT_OIDC_TOKEN_POLICIES="{{ .Data.data.token_policies }}" - export VAULT_OIDC_ADMIN_GROUP="{{ .Data.data.admin_group }}" - export VAULT_OIDC_ADMIN_POLICIES="{{ .Data.data.admin_policies }}" - export VAULT_OIDC_DEV_GROUP="{{ .Data.data.dev_group }}" - export VAULT_OIDC_DEV_POLICIES="{{ .Data.data.dev_policies }}" - export VAULT_OIDC_USER_GROUP="{{ .Data.data.user_group }}" - export VAULT_OIDC_USER_POLICIES="{{ .Data.data.user_policies }}" - export VAULT_OIDC_REDIRECT_URIS="{{ .Data.data.redirect_uris }}" - export VAULT_OIDC_BOUND_AUDIENCES="{{ .Data.data.bound_audiences }}" - export VAULT_OIDC_BOUND_CLAIMS="{{ .Data.data.bound_claims }}" - export VAULT_OIDC_BOUND_CLAIMS_TYPE="{{ .Data.data.bound_claims_type }}" - {{ end }} - spec: - serviceAccountName: vault-admin - restartPolicy: Never - nodeSelector: - kubernetes.io/arch: arm64 - node-role.kubernetes.io/worker: "true" - containers: - - name: configure-oidc - image: hashicorp/vault:1.17.6 - imagePullPolicy: IfNotPresent - command: - - /entrypoint.sh - args: - - sh - - /scripts/vault_oidc_configure.sh - env: - - name: VAULT_ADDR - value: http://10.43.57.249:8200 - - name: VAULT_K8S_ROLE - value: vault-admin - - name: VAULT_ENV_FILE - value: /vault/secrets/vault-oidc-env.sh - volumeMounts: - - name: vault-entrypoint - mountPath: /entrypoint.sh - subPath: vault-entrypoint.sh - - name: oidc-config-script - mountPath: /scripts - readOnly: true - volumes: - - name: vault-entrypoint - configMap: - name: vault-entrypoint - defaultMode: 493 - - name: oidc-config-script - configMap: - name: vault-oidc-config-script - defaultMode: 0555 diff --git a/services/vault/scripts/vault-entrypoint.sh b/services/vault/scripts/vault-entrypoint.sh deleted file mode 100644 index fa3b791e..00000000 --- a/services/vault/scripts/vault-entrypoint.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh -set -eu - -if [ -n "${VAULT_ENV_FILE:-}" ]; then - if [ -f "${VAULT_ENV_FILE}" ]; then - # shellcheck disable=SC1090 - . "${VAULT_ENV_FILE}" - else - echo "Vault env file not found: ${VAULT_ENV_FILE}" >&2 - exit 1 - fi -fi - -if [ -n "${VAULT_COPY_FILES:-}" ]; then - old_ifs="$IFS" - IFS=',' - for pair in ${VAULT_COPY_FILES}; do - src="${pair%%:*}" - dest="${pair#*:}" - if [ -z "${src}" ] || [ -z "${dest}" ]; then - echo "Vault copy entry malformed: ${pair}" >&2 - exit 1 - fi - if [ ! -f "${src}" ]; then - echo "Vault file not found: ${src}" >&2 - exit 1 - fi - mkdir -p "$(dirname "${dest}")" - cp "${src}" "${dest}" - done - IFS="$old_ifs" -fi - -exec "$@" diff --git a/services/vault/scripts/vault_k8s_auth_configure.sh b/services/vault/scripts/vault_k8s_auth_configure.sh deleted file mode 100644 index 0f5b8d24..00000000 --- a/services/vault/scripts/vault_k8s_auth_configure.sh +++ /dev/null @@ -1,259 +0,0 @@ -#!/usr/bin/env sh -set -eu - -log() { echo "[vault-k8s-auth] $*"; } - -vault_cmd() { - for attempt in 1 2 3 4 5 6; do - set +e - output="$(vault "$@" 2>&1)" - status=$? - set -e - if [ "${status}" -eq 0 ]; then - printf '%s' "${output}" - return 0 - fi - log "vault command failed; retrying (${attempt}/6)" - sleep $((attempt * 2)) - done - log "vault command failed; giving up" - return 1 -} - -ensure_token() { - if [ -n "${VAULT_TOKEN:-}" ]; then - return - fi - role="${VAULT_K8S_ROLE:-vault}" - jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" - if ! VAULT_TOKEN="$(vault_cmd write -field=token auth/kubernetes/login role="${role}" jwt="${jwt}")"; then - log "kubernetes auth login failed; set VAULT_TOKEN or fix role ${role}" - exit 1 - fi - export VAULT_TOKEN -} - -if ! status_json="$(vault_cmd status -format=json)"; then - log "vault status failed; check VAULT_ADDR and VAULT_TOKEN" - exit 1 -fi - -if ! printf '%s' "${status_json}" | grep -q '"initialized":[[:space:]]*true'; then - log "vault not initialized; skipping" - exit 0 -fi - -if printf '%s' "${status_json}" | grep -q '"sealed":[[:space:]]*true'; then - log "vault sealed; skipping" - exit 0 -fi - -ensure_token - -k8s_host="https://${KUBERNETES_SERVICE_HOST}:443" -k8s_ca="$(cat /var/run/secrets/kubernetes.io/serviceaccount/ca.crt)" -k8s_token="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" -role_ttl="${VAULT_K8S_ROLE_TTL:-1h}" -token_reviewer_jwt="${VAULT_K8S_TOKEN_REVIEWER_JWT:-}" - -if [ -z "${token_reviewer_jwt}" ] && [ -n "${VAULT_K8S_TOKEN_REVIEWER_JWT_FILE:-}" ] && [ -r "${VAULT_K8S_TOKEN_REVIEWER_JWT_FILE}" ]; then - token_reviewer_jwt="$(cat "${VAULT_K8S_TOKEN_REVIEWER_JWT_FILE}")" -fi -if [ -z "${token_reviewer_jwt}" ]; then - token_reviewer_jwt="${k8s_token}" -fi - -if ! vault_cmd auth list -format=json | grep -q '"kubernetes/"'; then - log "enabling kubernetes auth" - vault_cmd auth enable kubernetes -fi - -log "configuring kubernetes auth" -vault_cmd write auth/kubernetes/config \ - token_reviewer_jwt="${token_reviewer_jwt}" \ - kubernetes_host="${k8s_host}" \ - kubernetes_ca_cert="${k8s_ca}" - -write_raw_policy() { - name="$1" - body="$2" - log "writing policy ${name}" - printf '%s\n' "${body}" | vault_cmd policy write "${name}" - -} - -write_policy_and_role() { - role="$1" - namespace="$2" - service_accounts="$3" - read_paths="$4" - write_paths="$5" - - policy_body="" - for path in ${read_paths}; do - policy_body="${policy_body} -path \"kv/data/atlas/${path}\" { - capabilities = [\"read\"] -} -path \"kv/metadata/atlas/${path}\" { - capabilities = [\"list\"] -} -" - done - for path in ${write_paths}; do - policy_body="${policy_body} -path \"kv/data/atlas/${path}\" { - capabilities = [\"create\", \"update\", \"read\"] -} -path \"kv/metadata/atlas/${path}\" { - capabilities = [\"list\"] -} -" - done - - log "writing policy ${role}" - printf '%s\n' "${policy_body}" | vault_cmd policy write "${role}" - - - log "writing role ${role}" - vault_cmd write "auth/kubernetes/role/${role}" \ - bound_service_account_names="${service_accounts}" \ - bound_service_account_namespaces="${namespace}" \ - policies="${role}" \ - ttl="${role_ttl}" -} - -vault_admin_policy=' -path "sys/auth" { - capabilities = ["read"] -} -path "sys/auth/*" { - capabilities = ["create", "update", "delete", "sudo", "read"] -} -path "auth/kubernetes/*" { - capabilities = ["create", "update", "read"] -} -path "auth/oidc/*" { - capabilities = ["create", "update", "read"] -} -path "sys/policies/acl" { - capabilities = ["list"] -} -path "sys/policies/acl/*" { - capabilities = ["create", "update", "read"] -} -path "sys/internal/ui/mounts" { - capabilities = ["read"] -} -path "sys/mounts" { - capabilities = ["read"] -} -path "sys/mounts/auth/*" { - capabilities = ["read", "update", "sudo"] -} -path "kv/data/atlas/vault/*" { - capabilities = ["read"] -} -path "kv/metadata/atlas/vault/*" { - capabilities = ["list"] -} -path "kv/data/*" { - capabilities = ["create", "update", "read", "delete", "patch"] -} -path "kv/metadata" { - capabilities = ["list"] -} -path "kv/metadata/*" { - capabilities = ["read", "list", "delete"] -} -path "kv/data/atlas/shared/*" { - capabilities = ["create", "update", "read", "patch"] -} -path "kv/metadata/atlas/shared/*" { - capabilities = ["list"] -} -' - -write_raw_policy "vault-admin" "${vault_admin_policy}" -dev_kv_policy=' -path "kv/metadata" { - capabilities = ["list"] -} -path "kv/metadata/atlas" { - capabilities = ["list"] -} -path "kv/metadata/atlas/shared" { - capabilities = ["list"] -} -path "kv/metadata/atlas/shared/*" { - capabilities = ["list"] -} -path "kv/data/atlas/shared/*" { - capabilities = ["read"] -} -' -write_raw_policy "dev-kv" "${dev_kv_policy}" -log "writing role vault-admin" -vault_cmd write "auth/kubernetes/role/vault-admin" \ - bound_service_account_names="vault-admin,ariadne" \ - bound_service_account_namespaces="vault,maintenance" \ - policies="vault-admin" \ - ttl="${role_ttl}" - -write_policy_and_role "outline" "outline" "outline-vault" \ - "outline/* shared/postmark-relay" "" -write_policy_and_role "planka" "planka" "planka-vault" \ - "planka/* shared/postmark-relay" "" -write_policy_and_role "bstein-dev-home" "bstein-dev-home" "bstein-dev-home,bstein-dev-home-vault-sync" \ - "portal/* shared/chat-ai-keys-runtime shared/portal-e2e-client shared/postmark-relay mailu/mailu-initial-account-secret shared/harbor-pull" "" -write_policy_and_role "gitea" "gitea" "gitea-vault" \ - "gitea/*" "" -write_policy_and_role "vaultwarden" "vaultwarden" "vaultwarden-vault" \ - "vaultwarden/* mailu/mailu-initial-account-secret" "" -write_policy_and_role "sso" "sso" "sso-vault,sso-vault-sync,mas-secrets-ensure" \ - "sso/* portal/bstein-dev-home-keycloak-admin shared/keycloak-admin shared/portal-e2e-client shared/postmark-relay shared/harbor-pull" "" -write_policy_and_role "mailu-mailserver" "mailu-mailserver" "mailu-vault-sync" \ - "mailu/* shared/postmark-relay shared/harbor-pull" "" -write_policy_and_role "harbor" "harbor" "harbor-vault-sync" \ - "harbor/* shared/harbor-pull" "" -write_policy_and_role "nextcloud" "nextcloud" "nextcloud-vault" \ - "nextcloud/* shared/keycloak-admin shared/postmark-relay" "" -write_policy_and_role "comms" "comms" "comms-vault,atlasbot" \ - "comms/* shared/chat-ai-keys-runtime shared/harbor-pull" "" -write_policy_and_role "jenkins" "jenkins" "jenkins,jenkins-vault-sync" \ - "jenkins/* shared/harbor-pull" "" -write_policy_and_role "monitoring" "monitoring" "monitoring-vault-sync" \ - "monitoring/* shared/postmark-relay shared/harbor-pull" "" -write_policy_and_role "logging" "logging" "logging-vault-sync" \ - "logging/* shared/harbor-pull" "" -write_policy_and_role "pegasus" "jellyfin" "pegasus-vault-sync" \ - "pegasus/* shared/harbor-pull" "" -write_policy_and_role "crypto" "crypto" "crypto-vault-sync" \ - "crypto/* shared/harbor-pull" "" -write_policy_and_role "health" "health" "health-vault-sync" \ - "health/*" "" -write_policy_and_role "maintenance" "maintenance" "ariadne,maintenance-vault-sync,metis" \ - "maintenance/ariadne-db maintenance/metis-oidc maintenance/metis-ssh-keys maintenance/metis-runtime portal/atlas-portal-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret mailu/mailu-initial-account-secret nextcloud/nextcloud-db nextcloud/nextcloud-admin health/wger-admin finance/firefly-secrets comms/mas-admin-client-runtime comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin vault/vault-oidc-config shared/harbor-pull harbor/harbor-core" "" -write_policy_and_role "maintenance-metis-token-sync" "maintenance" "metis-token-sync" \ - "" \ - "maintenance/metis-runtime" -write_policy_and_role "finance" "finance" "finance-vault" \ - "finance/* shared/postmark-relay" "" -write_policy_and_role "finance-secrets" "finance" "finance-secrets-ensure" \ - "" \ - "finance/*" -write_policy_and_role "longhorn" "longhorn-system" "longhorn-vault,longhorn-vault-sync" \ - "longhorn/* shared/harbor-pull" "" -write_policy_and_role "postgres" "postgres" "postgres-vault" \ - "postgres/postgres-db" "" -write_policy_and_role "vault" "vault" "vault" \ - "vault/*" "" - -write_policy_and_role "sso-secrets" "sso" "mas-secrets-ensure" \ - "shared/keycloak-admin maintenance/metis-ssh-keys" \ - "harbor/harbor-oidc vault/vault-oidc-config comms/synapse-oidc logging/oauth2-proxy-logs-oidc finance/actual-oidc maintenance/metis-oidc maintenance/metis-ssh-keys" -write_policy_and_role "crypto-secrets" "crypto" "crypto-secrets-ensure" \ - "" \ - "crypto/wallet-monero-temp-rpc-auth" -write_policy_and_role "comms-secrets" "comms" \ - "comms-secrets-ensure,mas-db-ensure,mas-admin-client-secret-writer,othrys-synapse-signingkey-job" \ - "" \ - "comms/turn-shared-secret comms/livekit-api comms/synapse-redis comms/synapse-macaroon comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin comms/synapse-registration comms/mas-db comms/mas-admin-client-runtime comms/mas-secrets-runtime comms/othrys-synapse-signingkey" diff --git a/services/vault/scripts/vault_oidc_configure.sh b/services/vault/scripts/vault_oidc_configure.sh deleted file mode 100644 index 70da3b7d..00000000 --- a/services/vault/scripts/vault_oidc_configure.sh +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/env sh -set -eu - -log() { echo "[vault-oidc] $*"; } - -vault_cmd() { - for attempt in 1 2 3 4 5 6; do - set +e - output="$(vault "$@" 2>&1)" - status=$? - set -e - if [ "${status}" -eq 0 ]; then - printf '%s' "${output}" - return 0 - fi - log "vault command failed; retrying (${attempt}/6)" - sleep $((attempt * 2)) - done - log "vault command failed; giving up" - return 1 -} - -ensure_token() { - if [ -n "${VAULT_TOKEN:-}" ]; then - return - fi - role="${VAULT_K8S_ROLE:-vault}" - jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" - if ! VAULT_TOKEN="$(vault_cmd write -field=token auth/kubernetes/login role="${role}" jwt="${jwt}")"; then - log "kubernetes auth login failed; set VAULT_TOKEN or fix role ${role}" - exit 1 - fi - export VAULT_TOKEN -} - -if ! status_json="$(vault_cmd status -format=json)"; then - log "vault status failed; check VAULT_ADDR and VAULT_TOKEN" - exit 1 -fi - -if ! printf '%s' "${status_json}" | grep -q '"initialized":[[:space:]]*true'; then - log "vault not initialized; skipping" - exit 0 -fi - -if printf '%s' "${status_json}" | grep -q '"sealed":[[:space:]]*true'; then - log "vault sealed; skipping" - exit 0 -fi - -ensure_token - -: "${VAULT_OIDC_DISCOVERY_URL:?set VAULT_OIDC_DISCOVERY_URL}" -: "${VAULT_OIDC_CLIENT_ID:?set VAULT_OIDC_CLIENT_ID}" -: "${VAULT_OIDC_CLIENT_SECRET:?set VAULT_OIDC_CLIENT_SECRET}" - -default_role="${VAULT_OIDC_DEFAULT_ROLE:-admin}" -scopes="${VAULT_OIDC_SCOPES:-openid profile email groups}" -user_claim="${VAULT_OIDC_USER_CLAIM:-preferred_username}" -groups_claim="${VAULT_OIDC_GROUPS_CLAIM:-groups}" -redirect_uris="${VAULT_OIDC_REDIRECT_URIS:-https://secret.bstein.dev/ui/vault/auth/oidc/oidc/callback}" -bound_audiences="${VAULT_OIDC_BOUND_AUDIENCES:-${VAULT_OIDC_CLIENT_ID}}" -bound_claims_type="${VAULT_OIDC_BOUND_CLAIMS_TYPE:-string}" -bound_claims_type="$(printf '%s' "${bound_claims_type}" | tr -d '[:space:]')" -if [ -z "${bound_claims_type}" ] || [ "${bound_claims_type}" = "" ]; then - bound_claims_type="string" -fi - -admin_group="${VAULT_OIDC_ADMIN_GROUP:-admin}" -admin_policies="${VAULT_OIDC_ADMIN_POLICIES:-default,vault-admin}" -dev_group="${VAULT_OIDC_DEV_GROUP:-dev}" -dev_policies="${VAULT_OIDC_DEV_POLICIES:-default,dev-kv}" -user_group="${VAULT_OIDC_USER_GROUP:-${dev_group}}" -user_policies="${VAULT_OIDC_USER_POLICIES:-${VAULT_OIDC_TOKEN_POLICIES:-${dev_policies}}}" - -if ! vault_cmd auth list -format=json | grep -q '"oidc/"'; then - log "enabling oidc auth method" - vault_cmd auth enable oidc -fi - -log "configuring oidc auth" -vault_cmd write auth/oidc/config \ - oidc_discovery_url="${VAULT_OIDC_DISCOVERY_URL}" \ - oidc_client_id="${VAULT_OIDC_CLIENT_ID}" \ - oidc_client_secret="${VAULT_OIDC_CLIENT_SECRET}" \ - default_role="${default_role}" - -vault_cmd auth tune -listing-visibility=unauth oidc >/dev/null - -build_bound_claims() { - claim="$1" - groups="$2" - json="{\"${claim}\":[" - first=1 - old_ifs=$IFS - IFS=, - for item in $groups; do - item="$(printf '%s' "$item" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')" - if [ -z "${item}" ]; then - continue - fi - if [ "${first}" -eq 0 ]; then - json="${json}," - fi - json="${json}\"${item}\"" - first=0 - done - IFS=$old_ifs - json="${json}]}" - printf '%s' "${json}" -} - -build_json_array() { - items="$1" - json="[" - first=1 - old_ifs=$IFS - IFS=, - for item in $items; do - item="$(printf '%s' "$item" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')" - if [ -z "${item}" ]; then - continue - fi - if [ "${first}" -eq 0 ]; then - json="${json}," - fi - json="${json}\"${item}\"" - first=0 - done - IFS=$old_ifs - json="${json}]" - printf '%s' "${json}" -} - -configure_role() { - role_name="$1" - role_groups="$2" - role_policies="$3" - if [ -z "${role_name}" ] || [ -z "${role_groups}" ] || [ -z "${role_policies}" ]; then - log "skipping role ${role_name} (missing groups or policies)" - return - fi - claims="$(build_bound_claims "${groups_claim}" "${role_groups}")" - scopes_csv="$(printf '%s' "${scopes}" | tr ' ' ',' | tr -s ',' | sed 's/^,//;s/,$//')" - redirect_json="$(build_json_array "${redirect_uris}")" - payload_file="$(mktemp)" - cat > "${payload_file}" <