ariadne: migrate glue cronjobs to schedules

This commit is contained in:
Brad Stein 2026-04-10 21:22:35 -03:00
parent 60446ee830
commit 166020ca1d
39 changed files with 283 additions and 3572 deletions

View File

@ -1,18 +1,38 @@
max_success_age_hours: 48
allow_suspended:
- bstein-dev-home/vaultwarden-cred-sync
- comms/guest-name-randomizer
- comms/othrys-room-reset
- comms/pin-othrys-invite
- comms/seed-othrys-room
- finance/firefly-user-sync
- health/wger-admin-ensure
- health/wger-user-sync
- mailu-mailserver/mailu-sync-nightly
- nextcloud/nextcloud-mail-sync
- vault/vault-oidc-config
ariadne_schedule_tasks:
- schedule.mailu_sync
- schedule.nextcloud_sync
- schedule.vaultwarden_sync
- schedule.wger_admin
- task: schedule.mailu_sync
check_last_success: false
- task: schedule.nextcloud_sync
check_last_success: true
max_success_age_hours: 48
- task: schedule.nextcloud_cron
check_last_success: true
max_success_age_hours: 48
- task: schedule.nextcloud_maintenance
check_last_success: false
- task: schedule.vaultwarden_sync
check_last_success: true
max_success_age_hours: 48
- task: schedule.wger_user_sync
check_last_success: true
max_success_age_hours: 48
- task: schedule.wger_admin
check_last_success: false
- task: schedule.firefly_user_sync
check_last_success: true
max_success_age_hours: 48
- task: schedule.firefly_cron
check_last_success: false
- task: schedule.vault_k8s_auth
check_last_success: false
- task: schedule.vault_oidc
check_last_success: false
- task: schedule.comms_guest_name
check_last_success: true
max_success_age_hours: 48
- task: schedule.comms_pin_invite
check_last_success: false
- task: schedule.comms_reset_room
check_last_success: false
- task: schedule.comms_seed_room
check_last_success: true
max_success_age_hours: 48

View File

@ -0,0 +1,88 @@
from __future__ import annotations
import os
from datetime import datetime, timezone
from pathlib import Path
import requests
import yaml
CONFIG_PATH = Path(__file__).with_name("config.yaml")
def _load_config() -> dict:
with CONFIG_PATH.open("r", encoding="utf-8") as handle:
return yaml.safe_load(handle) or {}
def _query(promql: str) -> list[dict]:
vm_url = os.environ.get("VM_URL", "http://victoria-metrics-single-server:8428").rstrip("/")
response = requests.get(f"{vm_url}/api/v1/query", params={"query": promql}, timeout=10)
response.raise_for_status()
payload = response.json()
return payload.get("data", {}).get("result", [])
def _expected_tasks() -> list[dict]:
cfg = _load_config()
tasks = cfg.get("ariadne_schedule_tasks", [])
assert tasks, "No Ariadne schedule tasks configured"
return tasks
def _tracked_tasks(tasks: list[dict]) -> list[dict]:
tracked = [item for item in tasks if item.get("check_last_success")]
assert tracked, "No Ariadne schedule tasks are marked for success tracking"
return tracked
def _task_regex(tasks: list[dict]) -> str:
return "|".join(item["task"] for item in tasks)
def test_ariadne_schedule_series_exist():
tasks = _expected_tasks()
selector = _task_regex(tasks)
series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
seen = {item.get("metric", {}).get("task") for item in series}
missing = [item["task"] for item in tasks if item["task"] not in seen]
assert not missing, f"Missing next-run metrics for: {', '.join(missing)}"
def test_ariadne_schedule_recent_success():
tasks = _tracked_tasks(_expected_tasks())
selector = _task_regex(tasks)
series = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
seen = {item.get("metric", {}).get("task") for item in series}
missing = [item["task"] for item in tasks if item["task"] not in seen]
assert not missing, f"Missing last-success metrics for: {', '.join(missing)}"
now = datetime.now(timezone.utc)
age_by_task = {
item.get("metric", {}).get("task"): (now - datetime.fromtimestamp(float(item["value"][1]), tz=timezone.utc)).total_seconds() / 3600
for item in series
}
too_old = [
f"{task} ({age_by_task[task]:.1f}h > {item['max_success_age_hours']}h)"
for item in tasks
if (task := item["task"]) in age_by_task and age_by_task[task] > float(item["max_success_age_hours"])
]
assert not too_old, "Ariadne schedules are stale: " + ", ".join(too_old)
def test_ariadne_schedule_last_status_present_and_boolean():
tasks = _tracked_tasks(_expected_tasks())
selector = _task_regex(tasks)
series = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
seen = {item.get("metric", {}).get("task") for item in series}
missing = [item["task"] for item in tasks if item["task"] not in seen]
assert not missing, f"Missing last-status metrics for: {', '.join(missing)}"
invalid = []
for item in series:
task = item.get("metric", {}).get("task")
value = float(item["value"][1])
if value not in (0.0, 1.0):
invalid.append(f"{task}={value}")
assert not invalid, f"Unexpected Ariadne last-status values: {', '.join(invalid)}"

View File

@ -1,46 +0,0 @@
from __future__ import annotations
from datetime import datetime, timezone
from pathlib import Path
import yaml
from kubernetes import client, config
CONFIG_PATH = Path(__file__).with_name("config.yaml")
def _load_config() -> dict:
with CONFIG_PATH.open("r", encoding="utf-8") as handle:
return yaml.safe_load(handle) or {}
def _load_kube():
try:
config.load_incluster_config()
except config.ConfigException:
config.load_kube_config()
def test_glue_cronjobs_recent_success():
cfg = _load_config()
max_age_hours = int(cfg.get("max_success_age_hours", 48))
allow_suspended = set(cfg.get("allow_suspended", []))
_load_kube()
batch = client.BatchV1Api()
cronjobs = batch.list_cron_job_for_all_namespaces(label_selector="atlas.bstein.dev/glue=true").items
assert cronjobs, "No glue cronjobs found with atlas.bstein.dev/glue=true"
now = datetime.now(timezone.utc)
for cronjob in cronjobs:
name = f"{cronjob.metadata.namespace}/{cronjob.metadata.name}"
if cronjob.spec.suspend:
assert name in allow_suspended, f"{name} is suspended but not in allow_suspended"
continue
last_success = cronjob.status.last_successful_time
assert last_success is not None, f"{name} has no lastSuccessfulTime"
age_hours = (now - last_success).total_seconds() / 3600
assert age_hours <= max_age_hours, f"{name} last success {age_hours:.1f}h ago"

View File

@ -23,26 +23,45 @@ def _query(promql: str) -> list[dict]:
return payload.get("data", {}).get("result", [])
def test_glue_metrics_present():
series = _query('kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}')
assert series, "No glue cronjob label series found"
def _expected_tasks() -> list[dict]:
cfg = _load_config()
tasks = cfg.get("ariadne_schedule_tasks", [])
assert tasks, "No Ariadne schedule tasks configured"
return tasks
def test_glue_metrics_success_join():
query = (
"kube_cronjob_status_last_successful_time "
'and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}'
)
series = _query(query)
assert series, "No glue cronjob last success series found"
def _tracked_tasks(tasks: list[dict]) -> list[dict]:
tracked = [item for item in tasks if item.get("check_last_success")]
assert tracked, "No Ariadne schedule tasks are marked for success tracking"
return tracked
def _task_regex(tasks: list[dict]) -> str:
return "|".join(item["task"] for item in tasks)
def test_ariadne_schedule_metrics_present():
cfg = _load_config()
expected = cfg.get("ariadne_schedule_tasks", [])
if not expected:
return
series = _query("ariadne_schedule_next_run_timestamp_seconds")
tasks = {item.get("metric", {}).get("task") for item in series}
missing = [task for task in expected if task not in tasks]
tasks = _expected_tasks()
selector = _task_regex(tasks)
series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
seen = {item.get("metric", {}).get("task") for item in series}
missing = [item["task"] for item in tasks if item["task"] not in seen]
assert not missing, f"Missing Ariadne schedule metrics for: {', '.join(missing)}"
def test_ariadne_schedule_success_and_status_metrics_present():
tasks = _tracked_tasks(_expected_tasks())
selector = _task_regex(tasks)
success = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
status = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
success_tasks = {item.get("metric", {}).get("task") for item in success}
status_tasks = {item.get("metric", {}).get("task") for item in status}
expected = {item["task"] for item in tasks}
missing_success = sorted(expected - success_tasks)
missing_status = sorted(expected - status_tasks)
assert not missing_success, f"Missing Ariadne success metrics for: {', '.join(missing_success)}"
assert not missing_status, f"Missing Ariadne status metrics for: {', '.join(missing_status)}"

View File

@ -377,25 +377,75 @@ NAMESPACE_SCOPE_WORKLOAD = f'namespace!~"{INFRA_REGEX}"'
NAMESPACE_SCOPE_ALL = 'namespace=~".*"'
NAMESPACE_SCOPE_INFRA = f'namespace=~"{INFRA_REGEX}"'
NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"]
GLUE_LABEL = 'label_atlas_bstein_dev_glue="true"'
GLUE_JOBS = f"kube_cronjob_labels{{{GLUE_LABEL}}}"
GLUE_FILTER = f"and on(namespace,cronjob) {GLUE_JOBS}"
GLUE_LAST_SUCCESS = f"(kube_cronjob_status_last_successful_time {GLUE_FILTER})"
GLUE_LAST_SCHEDULE = f"(kube_cronjob_status_last_schedule_time {GLUE_FILTER})"
GLUE_SUSPENDED = f"(kube_cronjob_spec_suspend {GLUE_FILTER}) == 1"
GLUE_ACTIVE = f"(kube_cronjob_status_active {GLUE_FILTER})"
GLUE_LAST_SUCCESS_AGE = f"(time() - {GLUE_LAST_SUCCESS})"
GLUE_LAST_SCHEDULE_AGE = f"(time() - {GLUE_LAST_SCHEDULE})"
GLUE_LAST_SUCCESS_AGE_HOURS = f"({GLUE_LAST_SUCCESS_AGE}) / 3600"
GLUE_LAST_SCHEDULE_AGE_HOURS = f"({GLUE_LAST_SCHEDULE_AGE}) / 3600"
GLUE_STALE_WINDOW_SEC = 36 * 3600
GLUE_STALE = f"({GLUE_LAST_SUCCESS_AGE} > bool {GLUE_STALE_WINDOW_SEC})"
GLUE_MISSING = f"({GLUE_JOBS} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time)"
GLUE_STALE_ACTIVE = f"({GLUE_STALE} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
GLUE_MISSING_ACTIVE = f"({GLUE_MISSING} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
GLUE_STALE_COUNT = f"(sum({GLUE_STALE_ACTIVE}) + count({GLUE_MISSING_ACTIVE})) or on() vector(0)"
GLUE_MISSING_COUNT = f"count({GLUE_MISSING_ACTIVE}) or on() vector(0)"
GLUE_SUSPENDED_COUNT = f"sum({GLUE_SUSPENDED}) or on() vector(0)"
def promql_task_regex(tasks):
"""Return a PromQL-safe regex alternation for the provided task names."""
return "|".join(tasks)
ARIADNE_ALL_SCHEDULE_TASKS = [
"schedule.mailu_sync",
"schedule.nextcloud_sync",
"schedule.nextcloud_cron",
"schedule.nextcloud_maintenance",
"schedule.vaultwarden_sync",
"schedule.wger_user_sync",
"schedule.wger_admin",
"schedule.firefly_user_sync",
"schedule.firefly_cron",
"schedule.vault_k8s_auth",
"schedule.vault_oidc",
"schedule.comms_guest_name",
"schedule.comms_pin_invite",
"schedule.comms_reset_room",
"schedule.comms_seed_room",
]
ARIADNE_FAST_SCHEDULE_TASKS = [
task
for task in ARIADNE_ALL_SCHEDULE_TASKS
if task not in {"schedule.comms_pin_invite", "schedule.comms_reset_room"}
]
ARIADNE_SCHEDULE_HEALTH_TASKS = [
"schedule.nextcloud_sync",
"schedule.nextcloud_cron",
"schedule.vaultwarden_sync",
"schedule.wger_user_sync",
"schedule.firefly_user_sync",
"schedule.comms_guest_name",
"schedule.comms_seed_room",
]
ARIADNE_ALL_SCHEDULE_FILTER = f'task=~"^({promql_task_regex(ARIADNE_ALL_SCHEDULE_TASKS)})$"'
ARIADNE_FAST_SCHEDULE_FILTER = f'task=~"^({promql_task_regex(ARIADNE_FAST_SCHEDULE_TASKS)})$"'
ARIADNE_SCHEDULE_HEALTH_FILTER = f'task=~"^({promql_task_regex(ARIADNE_SCHEDULE_HEALTH_TASKS)})$"'
ARIADNE_ALL_SCHEDULE_NEXT_RUN = f"ariadne_schedule_next_run_timestamp_seconds{{{ARIADNE_ALL_SCHEDULE_FILTER}}}"
ARIADNE_ALL_SCHEDULE_LAST_SUCCESS = (
f"ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_ALL_SCHEDULE_FILTER}}}"
)
ARIADNE_ALL_SCHEDULE_LAST_ERROR = f"ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_ALL_SCHEDULE_FILTER}}}"
ARIADNE_ALL_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_ALL_SCHEDULE_FILTER}}}"
ARIADNE_FAST_SCHEDULE_LAST_SUCCESS = (
f"ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_FAST_SCHEDULE_FILTER}}}"
)
ARIADNE_FAST_SCHEDULE_LAST_ERROR = f"ariadne_schedule_last_error_timestamp_seconds{{{ARIADNE_FAST_SCHEDULE_FILTER}}}"
ARIADNE_FAST_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_FAST_SCHEDULE_FILTER}}}"
ARIADNE_HEALTH_SCHEDULE_LAST_SUCCESS = (
f"ariadne_schedule_last_success_timestamp_seconds{{{ARIADNE_SCHEDULE_HEALTH_FILTER}}}"
)
ARIADNE_HEALTH_SCHEDULE_LAST_STATUS = f"ariadne_schedule_last_status{{{ARIADNE_SCHEDULE_HEALTH_FILTER}}}"
ARIADNE_SCHEDULE_LAST_SUCCESS_AGE = f"(time() - {ARIADNE_HEALTH_SCHEDULE_LAST_SUCCESS})"
ARIADNE_SCHEDULE_LAST_ERROR_AGE = f"(time() - {ARIADNE_ALL_SCHEDULE_LAST_ERROR})"
ARIADNE_SCHEDULE_LAST_SUCCESS_AGE_HOURS = f"({ARIADNE_SCHEDULE_LAST_SUCCESS_AGE}) / 3600"
ARIADNE_SCHEDULE_LAST_ERROR_AGE_HOURS = f"({ARIADNE_SCHEDULE_LAST_ERROR_AGE}) / 3600"
ARIADNE_SCHEDULE_STALE_WINDOW_SEC = 36 * 3600
ARIADNE_SCHEDULE_STALE = f"(({ARIADNE_SCHEDULE_LAST_SUCCESS_AGE}) > bool {ARIADNE_SCHEDULE_STALE_WINDOW_SEC})"
ARIADNE_SCHEDULE_MISSING = (
f"({ARIADNE_ALL_SCHEDULE_NEXT_RUN} unless on(task) {ARIADNE_HEALTH_SCHEDULE_LAST_SUCCESS})"
)
ARIADNE_SCHEDULE_FAILED = f"((1 - {ARIADNE_HEALTH_SCHEDULE_LAST_STATUS}) > bool 0)"
ARIADNE_SCHEDULE_STALE_COUNT = f"sum({ARIADNE_SCHEDULE_STALE}) or on() vector(0)"
ARIADNE_SCHEDULE_MISSING_COUNT = f"count({ARIADNE_SCHEDULE_MISSING}) or on() vector(0)"
ARIADNE_SCHEDULE_FAILED_COUNT = f"sum({ARIADNE_SCHEDULE_FAILED}) or on() vector(0)"
ARIADNE_TASK_ERRORS_RANGE = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[$__range]))'
ARIADNE_TASK_ERRORS_24H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[24h]))'
ARIADNE_TASK_ERRORS_1H = 'sum by (task) (increase(ariadne_task_runs_total{status="error"}[1h]))'
@ -410,14 +460,18 @@ ARIADNE_TASK_FAILURES_SERIES = 'sum(increase(ariadne_task_runs_total{status="err
ARIADNE_TASK_WARNINGS_SERIES = (
'sum(increase(ariadne_task_runs_total{status!~"ok|error"}[$__interval])) or on() vector(0)'
)
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = "(time() - ariadne_schedule_last_success_timestamp_seconds) / 3600"
ARIADNE_SCHEDULE_LAST_ERROR_HOURS = "(time() - ariadne_schedule_last_error_timestamp_seconds) / 3600"
ARIADNE_SCHEDULE_LAST_SUCCESS_HOURS = f"(time() - {ARIADNE_ALL_SCHEDULE_LAST_SUCCESS}) / 3600"
ARIADNE_SCHEDULE_LAST_ERROR_HOURS = f"(time() - {ARIADNE_ALL_SCHEDULE_LAST_ERROR}) / 3600"
ARIADNE_SCHEDULE_LAST_SUCCESS_RANGE_HOURS = (
"(time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600"
f"(time() - max_over_time({ARIADNE_ALL_SCHEDULE_LAST_SUCCESS}[$__range])) / 3600"
)
ARIADNE_SCHEDULE_LAST_ERROR_RANGE_HOURS = (
"(time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600"
f"(time() - max_over_time({ARIADNE_ALL_SCHEDULE_LAST_ERROR}[$__range])) / 3600"
)
ARIADNE_FAST_SCHEDULE_LAST_SUCCESS_RANGE_HOURS = (
f"(time() - max_over_time({ARIADNE_FAST_SCHEDULE_LAST_SUCCESS}[$__range])) / 3600"
)
ARIADNE_FAST_SCHEDULE_NEXT_RUN_HOURS = f"(({ARIADNE_ALL_SCHEDULE_NEXT_RUN} - time()) / 3600)"
ARIADNE_ACCESS_REQUESTS = "ariadne_access_requests_total"
PLATFORM_TEST_SUITE_NAMES = [
"ariadne",
@ -593,8 +647,6 @@ ONEOFF_JOB_POD_AGE_HOURS = (
'* on(namespace,pod) group_left(phase) '
'max by (namespace,pod,phase) (kube_pod_status_phase{phase=~"Running|Succeeded"})'
)
GLUE_LAST_SUCCESS_RANGE_HOURS = f"(time() - max_over_time({GLUE_LAST_SUCCESS}[$__range])) / 3600"
GLUE_LAST_SCHEDULE_RANGE_HOURS = f"(time() - max_over_time({GLUE_LAST_SCHEDULE}[$__range])) / 3600"
GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"]
GPU_NODE_REGEX = "|".join(GPU_NODES)
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
@ -2838,8 +2890,8 @@ def build_jobs_dashboard():
panels.append(
stat_panel(
4,
"Glue Jobs Stale (>36h)",
GLUE_STALE_COUNT,
"Ariadne Schedules Stale (>36h)",
ARIADNE_SCHEDULE_STALE_COUNT,
{"h": 4, "w": 4, "x": 0, "y": 7},
unit="none",
thresholds={
@ -2856,8 +2908,8 @@ def build_jobs_dashboard():
panels.append(
stat_panel(
5,
"Glue Jobs Missing Success",
GLUE_MISSING_COUNT,
"Ariadne Schedules Missing Success",
ARIADNE_SCHEDULE_MISSING_COUNT,
{"h": 4, "w": 4, "x": 4, "y": 7},
unit="none",
)
@ -2865,8 +2917,8 @@ def build_jobs_dashboard():
panels.append(
stat_panel(
6,
"Glue Jobs Suspended",
GLUE_SUSPENDED_COUNT,
"Ariadne Schedules Failed Last Run",
ARIADNE_SCHEDULE_FAILED_COUNT,
{"h": 4, "w": 4, "x": 8, "y": 7},
unit="none",
)
@ -2927,12 +2979,12 @@ def build_jobs_dashboard():
panels.append(
bargauge_panel(
12,
"Glue Jobs Last Success (hours ago)",
GLUE_LAST_SUCCESS_RANGE_HOURS,
"Ariadne Fast Schedule Last Success (hours ago)",
ARIADNE_FAST_SCHEDULE_LAST_SUCCESS_RANGE_HOURS,
{"h": 6, "w": 12, "x": 0, "y": 23},
unit="h",
instant=True,
legend="{{namespace}}/{{cronjob}}",
legend="{{task}}",
thresholds=age_thresholds,
decimals=2,
)
@ -2940,12 +2992,12 @@ def build_jobs_dashboard():
panels.append(
bargauge_panel(
13,
"Glue Jobs Last Schedule (hours ago)",
GLUE_LAST_SCHEDULE_RANGE_HOURS,
"Ariadne Fast Schedule Next Run (hours from now)",
ARIADNE_FAST_SCHEDULE_NEXT_RUN_HOURS,
{"h": 6, "w": 12, "x": 12, "y": 23},
unit="h",
instant=True,
legend="{{namespace}}/{{cronjob}}",
legend="{{task}}",
thresholds=age_thresholds,
decimals=2,
)
@ -3045,7 +3097,7 @@ def build_jobs_dashboard():
"annotations": {"list": []},
"schemaVersion": 39,
"style": "dark",
"tags": ["atlas", "jobs", "glue"],
"tags": ["atlas", "jobs", "ariadne"],
}

View File

@ -15,7 +15,6 @@ resources:
- frontend-service.yaml
- backend-deployment.yaml
- backend-service.yaml
- vaultwarden-cred-sync-cronjob.yaml
- oneoffs/portal-onboarding-e2e-test-job.yaml
- ingress.yaml
images:
@ -30,12 +29,6 @@ configMapGenerator:
- gateway.py=scripts/gateway.py
options:
disableNameSuffixHash: true
- name: vaultwarden-cred-sync-script
namespace: bstein-dev-home
files:
- vaultwarden_cred_sync.py=scripts/vaultwarden_cred_sync.py
options:
disableNameSuffixHash: true
- name: portal-onboarding-e2e-tests
namespace: bstein-dev-home
files:

View File

@ -1,245 +0,0 @@
#!/usr/bin/env python3
from __future__ import annotations
import os
import sys
import time
from datetime import datetime, timezone
from typing import Any, Iterable
import httpx
from atlas_portal import settings
from atlas_portal.keycloak import admin_client
from atlas_portal.vaultwarden import invite_user
VAULTWARDEN_EMAIL_ATTR = "vaultwarden_email"
VAULTWARDEN_STATUS_ATTR = "vaultwarden_status"
VAULTWARDEN_SYNCED_AT_ATTR = "vaultwarden_synced_at"
VAULTWARDEN_RETRY_COOLDOWN_SEC = int(os.getenv("VAULTWARDEN_RETRY_COOLDOWN_SEC", "1800"))
VAULTWARDEN_FAILURE_BAILOUT = int(os.getenv("VAULTWARDEN_FAILURE_BAILOUT", "2"))
def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]:
client = admin_client()
if not client.ready():
raise RuntimeError("keycloak admin client not configured")
url = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users"
first = 0
while True:
headers = _headers_with_retry(client)
# We need attributes for idempotency (vaultwarden_status/vaultwarden_email). Keycloak defaults to a
# brief representation which may omit these.
params = {"first": str(first), "max": str(page_size), "briefRepresentation": "false"}
payload = None
for attempt in range(1, 6):
try:
with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http:
resp = http.get(url, params=params, headers=headers)
resp.raise_for_status()
payload = resp.json()
break
except httpx.HTTPError as exc:
if attempt == 5:
raise
time.sleep(attempt * 2)
if not isinstance(payload, list) or not payload:
return
for item in payload:
if isinstance(item, dict):
yield item
if len(payload) < page_size:
return
first += page_size
def _headers_with_retry(client, attempts: int = 6) -> dict[str, str]:
last_exc: Exception | None = None
for attempt in range(1, attempts + 1):
try:
return client.headers()
except Exception as exc:
last_exc = exc
time.sleep(attempt * 2)
if last_exc:
raise last_exc
raise RuntimeError("failed to fetch keycloak headers")
def _extract_attr(attrs: Any, key: str) -> str:
if not isinstance(attrs, dict):
return ""
raw = attrs.get(key)
if isinstance(raw, list):
for item in raw:
if isinstance(item, str) and item.strip():
return item.strip()
return ""
if isinstance(raw, str) and raw.strip():
return raw.strip()
return ""
def _parse_synced_at(value: str) -> float | None:
value = (value or "").strip()
if not value:
return None
for fmt in ("%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S%z"):
try:
parsed = datetime.strptime(value, fmt)
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=timezone.utc)
return parsed.timestamp()
except ValueError:
continue
return None
def _vaultwarden_email_for_user(user: dict[str, Any]) -> str:
username = (user.get("username") if isinstance(user.get("username"), str) else "") or ""
username = username.strip()
if not username:
return ""
attrs = user.get("attributes")
vaultwarden_email = _extract_attr(attrs, VAULTWARDEN_EMAIL_ATTR)
if vaultwarden_email:
return vaultwarden_email
mailu_email = _extract_attr(attrs, "mailu_email")
if mailu_email:
return mailu_email
email = (user.get("email") if isinstance(user.get("email"), str) else "") or ""
email = email.strip()
if email and email.lower().endswith(f"@{settings.MAILU_DOMAIN.lower()}"):
return email
# Don't guess an internal mailbox address until Mailu sync has run and stored mailu_email.
# This avoids spamming Vaultwarden invites that can never be delivered (unknown recipient).
return ""
def _set_user_attribute_if_missing(username: str, user: dict[str, Any], key: str, value: str) -> None:
value = (value or "").strip()
if not value:
return
existing = _extract_attr(user.get("attributes"), key)
if existing:
return
admin_client().set_user_attribute(username, key, value)
def _set_user_attribute(username: str, key: str, value: str) -> None:
value = (value or "").strip()
if not value:
return
admin_client().set_user_attribute(username, key, value)
def main() -> int:
processed = 0
created = 0
skipped = 0
failures = 0
consecutive_failures = 0
for user in _iter_keycloak_users():
username = (user.get("username") if isinstance(user.get("username"), str) else "") or ""
username = username.strip()
if not username:
skipped += 1
continue
enabled = user.get("enabled")
if enabled is False:
skipped += 1
continue
if user.get("serviceAccountClientId") or username.startswith("service-account-"):
skipped += 1
continue
# Fetch the full user payload so we can reliably read attributes (and skip re-invites).
user_id = (user.get("id") if isinstance(user.get("id"), str) else "") or ""
user_id = user_id.strip()
full_user = user
if user_id:
try:
full_user = admin_client().get_user(user_id)
except Exception:
full_user = user
current_status = _extract_attr(full_user.get("attributes"), VAULTWARDEN_STATUS_ATTR)
current_synced_at = _extract_attr(full_user.get("attributes"), VAULTWARDEN_SYNCED_AT_ATTR)
current_synced_ts = _parse_synced_at(current_synced_at)
if current_status in {"rate_limited", "error"} and current_synced_ts:
if time.time() - current_synced_ts < VAULTWARDEN_RETRY_COOLDOWN_SEC:
skipped += 1
continue
email = _vaultwarden_email_for_user(full_user)
if not email:
print(f"skip {username}: missing email", file=sys.stderr)
skipped += 1
continue
try:
_set_user_attribute_if_missing(username, full_user, VAULTWARDEN_EMAIL_ATTR, email)
except Exception:
pass
# If we've already successfully invited or confirmed presence, do not re-invite on every cron run.
# Vaultwarden returns 409 for "already exists", which is idempotent but noisy and can trigger rate limits.
if current_status in {"invited", "already_present"}:
if not current_synced_at:
try:
_set_user_attribute(
username,
VAULTWARDEN_SYNCED_AT_ATTR,
time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
)
except Exception:
pass
skipped += 1
continue
processed += 1
result = invite_user(email)
if result.ok:
created += 1
consecutive_failures = 0
print(f"ok {username}: {result.status}")
try:
_set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status)
_set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
except Exception:
pass
else:
failures += 1
if result.status in {"rate_limited", "error"}:
consecutive_failures += 1
print(f"err {username}: {result.status} {result.detail}", file=sys.stderr)
try:
_set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status)
_set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
except Exception:
pass
if consecutive_failures >= VAULTWARDEN_FAILURE_BAILOUT:
print("vaultwarden: too many consecutive failures; aborting run", file=sys.stderr)
break
print(
f"done processed={processed} created_or_present={created} skipped={skipped} failures={failures}",
file=sys.stderr,
)
return 0 if failures == 0 else 2
if __name__ == "__main__":
raise SystemExit(main())

View File

@ -1,86 +0,0 @@
# services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: vaultwarden-cred-sync
namespace: bstein-dev-home
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/15 * * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "bstein-dev-home"
vault.hashicorp.com/agent-inject-secret-portal-env.sh: "kv/data/atlas/portal/atlas-portal-db"
vault.hashicorp.com/agent-inject-template-portal-env.sh: |
{{ with secret "kv/data/atlas/portal/atlas-portal-db" }}
export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}"
{{ end }}
{{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }}
export KEYCLOAK_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/chat-ai-keys-runtime" }}
export CHAT_KEY_MATRIX="{{ .Data.data.matrix }}"
export CHAT_KEY_HOMEPAGE="{{ .Data.data.homepage }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/portal-e2e-client" }}
export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}"
export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}"
{{ end }}
spec:
serviceAccountName: bstein-dev-home
restartPolicy: Never
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
imagePullSecrets:
- name: harbor-regcred
containers:
- name: sync
image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-95
imagePullPolicy: Always
command: ["/bin/sh", "-c"]
args:
- >-
. /vault/secrets/portal-env.sh
&& exec python /scripts/vaultwarden_cred_sync.py
env:
- name: PYTHONPATH
value: /app
- name: KEYCLOAK_ENABLED
value: "true"
- name: KEYCLOAK_REALM
value: atlas
- name: KEYCLOAK_ADMIN_URL
value: http://keycloak.sso.svc.cluster.local
- name: KEYCLOAK_ADMIN_REALM
value: atlas
- name: KEYCLOAK_ADMIN_CLIENT_ID
value: bstein-dev-home-admin
- name: HTTP_CHECK_TIMEOUT_SEC
value: "20"
- name: VAULTWARDEN_ADMIN_SESSION_TTL_SEC
value: "900"
- name: VAULTWARDEN_RETRY_COOLDOWN_SEC
value: "1800"
- name: VAULTWARDEN_FAILURE_BAILOUT
value: "2"
volumeMounts:
- name: vaultwarden-cred-sync-script
mountPath: /scripts
readOnly: true
volumes:
- name: vaultwarden-cred-sync-script
configMap:
name: vaultwarden-cred-sync-script
defaultMode: 0555

View File

@ -1,471 +0,0 @@
# services/comms/guest-name-job.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: guest-name-randomizer
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/1 * * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "comms"
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
vault.hashicorp.com/agent-inject-template-turn-secret: |
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
vault.hashicorp.com/agent-inject-template-livekit-primary: |
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-bot-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-seeder-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-matrix: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-homepage: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
spec:
restartPolicy: Never
serviceAccountName: comms-vault
nodeSelector:
hardware: rpi5
volumes:
- name: vault-scripts
configMap:
name: comms-vault-env
defaultMode: 0555
containers:
- name: rename
image: registry.bstein.dev/bstein/comms-guest-tools:0.1.0
volumeMounts:
- name: vault-scripts
mountPath: /vault/scripts
readOnly: true
env:
- name: SYNAPSE_BASE
value: http://othrys-synapse-matrix-synapse:8008
- name: MAS_ADMIN_CLIENT_ID
value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM
- name: MAS_ADMIN_CLIENT_SECRET_FILE
value: /vault/secrets/mas-admin-secret
- name: MAS_ADMIN_API_BASE
value: http://matrix-authentication-service:8081/api/admin/v1
- name: MAS_TOKEN_URL
value: http://matrix-authentication-service:8080/oauth2/token
- name: SEEDER_USER
value: othrys-seeder
- name: PGHOST
value: postgres-service.postgres.svc.cluster.local
- name: PGPORT
value: "5432"
- name: PGDATABASE
value: synapse
- name: PGUSER
value: synapse
command:
- /bin/sh
- -c
- |
set -euo pipefail
. /vault/scripts/comms_vault_env.sh
python - <<'PY'
import base64
import os
import random
import requests
import time
import urllib.parse
import psycopg2
ADJ = [
"brisk","calm","eager","gentle","merry","nifty","rapid","sunny","witty","zesty",
"amber","bold","bright","crisp","daring","frosty","glad","jolly","lively","mellow",
"quiet","ripe","serene","spry","tidy","vivid","warm","wild","clever","kind",
]
NOUN = [
"otter","falcon","comet","ember","grove","harbor","meadow","raven","river","summit",
"breeze","cedar","cinder","cove","delta","forest","glade","lark","marsh","peak",
"pine","quartz","reef","ridge","sable","sage","shore","thunder","vale","zephyr",
]
BASE = os.environ["SYNAPSE_BASE"]
MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"]
MAS_ADMIN_CLIENT_SECRET_FILE = os.environ["MAS_ADMIN_CLIENT_SECRET_FILE"]
MAS_ADMIN_API_BASE = os.environ["MAS_ADMIN_API_BASE"].rstrip("/")
MAS_TOKEN_URL = os.environ["MAS_TOKEN_URL"]
SEEDER_USER = os.environ["SEEDER_USER"]
ROOM_ALIAS = "#othrys:live.bstein.dev"
SERVER_NAME = "live.bstein.dev"
STALE_GUEST_MS = 14 * 24 * 60 * 60 * 1000
def mas_admin_token():
with open(MAS_ADMIN_CLIENT_SECRET_FILE, "r", encoding="utf-8") as f:
secret = f.read().strip()
basic = base64.b64encode(f"{MAS_ADMIN_CLIENT_ID}:{secret}".encode()).decode()
last_err = None
for attempt in range(5):
try:
r = requests.post(
MAS_TOKEN_URL,
headers={"Authorization": f"Basic {basic}"},
data={"grant_type": "client_credentials", "scope": "urn:mas:admin"},
timeout=30,
)
r.raise_for_status()
return r.json()["access_token"]
except Exception as exc: # noqa: BLE001
last_err = exc
time.sleep(2 ** attempt)
raise last_err
def mas_user_id(token, username):
r = requests.get(
f"{MAS_ADMIN_API_BASE}/users/by-username/{urllib.parse.quote(username)}",
headers={"Authorization": f"Bearer {token}"},
timeout=30,
)
r.raise_for_status()
return r.json()["data"]["id"]
def mas_personal_session(token, user_id):
r = requests.post(
f"{MAS_ADMIN_API_BASE}/personal-sessions",
headers={"Authorization": f"Bearer {token}"},
json={
"actor_user_id": user_id,
"human_name": "guest-name-randomizer",
"scope": "urn:matrix:client:api:*",
"expires_in": 300,
},
timeout=30,
)
r.raise_for_status()
data = r.json().get("data", {}).get("attributes", {}) or {}
return data["access_token"], r.json()["data"]["id"]
def mas_revoke_session(token, session_id):
requests.post(
f"{MAS_ADMIN_API_BASE}/personal-sessions/{urllib.parse.quote(session_id)}/revoke",
headers={"Authorization": f"Bearer {token}"},
json={},
timeout=30,
)
def resolve_alias(token, alias):
headers = {"Authorization": f"Bearer {token}"}
enc = urllib.parse.quote(alias)
r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=headers)
r.raise_for_status()
return r.json()["room_id"]
def room_members(token, room_id):
headers = {"Authorization": f"Bearer {token}"}
r = requests.get(f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members", headers=headers)
r.raise_for_status()
members = set()
existing_names = set()
for ev in r.json().get("chunk", []):
user_id = ev.get("state_key")
if user_id:
members.add(user_id)
disp = (ev.get("content") or {}).get("displayname")
if disp:
existing_names.add(disp)
return members, existing_names
def mas_list_users(token):
headers = {"Authorization": f"Bearer {token}"}
users = []
cursor = None
while True:
url = f"{MAS_ADMIN_API_BASE}/users?page[size]=100"
if cursor:
url += f"&page[after]={urllib.parse.quote(cursor)}"
r = requests.get(url, headers=headers, timeout=30)
r.raise_for_status()
data = r.json().get("data", [])
if not data:
break
users.extend(data)
cursor = data[-1].get("meta", {}).get("page", {}).get("cursor")
if not cursor:
break
return users
def synapse_list_users(token):
headers = {"Authorization": f"Bearer {token}"}
users = []
from_token = None
while True:
url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100"
if from_token:
url += f"&from={urllib.parse.quote(from_token)}"
r = requests.get(url, headers=headers, timeout=30)
r.raise_for_status()
payload = r.json()
users.extend(payload.get("users", []))
from_token = payload.get("next_token")
if not from_token:
break
return users
def should_prune_guest(entry, now_ms):
if not entry.get("is_guest"):
return False
last_seen = entry.get("last_seen_ts")
if last_seen is None:
return False
try:
last_seen = int(last_seen)
except (TypeError, ValueError):
return False
return now_ms - last_seen > STALE_GUEST_MS
def prune_guest(token, user_id):
headers = {"Authorization": f"Bearer {token}"}
try:
r = requests.delete(
f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
headers=headers,
params={"erase": "true"},
timeout=30,
)
except Exception as exc: # noqa: BLE001
print(f"guest prune failed for {user_id}: {exc}")
return False
if r.status_code in (200, 202, 204, 404):
return True
print(f"guest prune failed for {user_id}: {r.status_code} {r.text}")
return False
def user_id_for_username(username):
return f"@{username}:live.bstein.dev"
def get_displayname(token, user_id):
headers = {"Authorization": f"Bearer {token}"}
r = requests.get(f"{BASE}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}", headers=headers)
r.raise_for_status()
return r.json().get("displayname")
def get_displayname_admin(token, user_id):
headers = {"Authorization": f"Bearer {token}"}
r = requests.get(
f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
headers=headers,
timeout=30,
)
if r.status_code == 404:
return None
r.raise_for_status()
return r.json().get("displayname")
def set_displayname(token, room_id, user_id, name, in_room):
headers = {"Authorization": f"Bearer {token}"}
payload = {"displayname": name}
r = requests.put(
f"{BASE}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}/displayname",
headers=headers,
json=payload,
)
r.raise_for_status()
if not in_room:
return
state_url = f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.member/{urllib.parse.quote(user_id)}"
content = {"membership": "join", "displayname": name}
requests.put(state_url, headers=headers, json=content, timeout=30)
def set_displayname_admin(token, user_id, name):
headers = {"Authorization": f"Bearer {token}"}
payload = {"displayname": name}
r = requests.put(
f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
headers=headers,
json=payload,
timeout=30,
)
if r.status_code in (200, 201, 204):
return True
return False
def needs_rename_username(username):
return username.isdigit() or username.startswith("guest-")
def needs_rename_display(display):
return not display or display.isdigit() or display.startswith("guest-")
def db_rename_numeric(existing_names):
profile_rows = []
profile_index = {}
users = []
conn = psycopg2.connect(
host=os.environ["PGHOST"],
port=int(os.environ["PGPORT"]),
dbname=os.environ["PGDATABASE"],
user=os.environ["PGUSER"],
password=os.environ["PGPASSWORD"],
)
try:
with conn:
with conn.cursor() as cur:
cur.execute(
"SELECT user_id, full_user_id, displayname FROM profiles WHERE full_user_id ~ %s",
(f"^@\\d+:{SERVER_NAME}$",),
)
profile_rows = cur.fetchall()
profile_index = {row[1]: row for row in profile_rows}
for user_id, full_user_id, display in profile_rows:
if display and not needs_rename_display(display):
continue
new = None
for _ in range(30):
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
if candidate not in existing_names:
new = candidate
existing_names.add(candidate)
break
if not new:
continue
cur.execute(
"UPDATE profiles SET displayname = %s WHERE full_user_id = %s",
(new, full_user_id),
)
cur.execute(
"SELECT name FROM users WHERE name ~ %s",
(f"^@\\d+:{SERVER_NAME}$",),
)
users = [row[0] for row in cur.fetchall()]
if not users:
return
cur.execute(
"SELECT user_id, full_user_id FROM profiles WHERE full_user_id = ANY(%s)",
(users,),
)
for existing_full in cur.fetchall():
profile_index.setdefault(existing_full[1], existing_full)
for full_user_id in users:
if full_user_id in profile_index:
continue
localpart = full_user_id.split(":", 1)[0].lstrip("@")
new = None
for _ in range(30):
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
if candidate not in existing_names:
new = candidate
existing_names.add(candidate)
break
if not new:
continue
cur.execute(
"INSERT INTO profiles (user_id, displayname, full_user_id) VALUES (%s, %s, %s) "
"ON CONFLICT (full_user_id) DO UPDATE SET displayname = EXCLUDED.displayname",
(localpart, new, full_user_id),
)
finally:
conn.close()
admin_token = mas_admin_token()
seeder_id = mas_user_id(admin_token, SEEDER_USER)
seeder_token, seeder_session = mas_personal_session(admin_token, seeder_id)
try:
room_id = resolve_alias(seeder_token, ROOM_ALIAS)
members, existing = room_members(seeder_token, room_id)
users = mas_list_users(admin_token)
mas_usernames = set()
for user in users:
attrs = user.get("attributes") or {}
username = attrs.get("username") or ""
if username:
mas_usernames.add(username)
legacy_guest = attrs.get("legacy_guest")
if not username:
continue
if not (legacy_guest or needs_rename_username(username)):
continue
user_id = user_id_for_username(username)
access_token, session_id = mas_personal_session(admin_token, user["id"])
try:
display = get_displayname(access_token, user_id)
if display and not needs_rename_display(display):
continue
new = None
for _ in range(30):
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
if candidate not in existing:
new = candidate
existing.add(candidate)
break
if not new:
continue
set_displayname(access_token, room_id, user_id, new, user_id in members)
finally:
mas_revoke_session(admin_token, session_id)
try:
entries = synapse_list_users(seeder_token)
except Exception as exc: # noqa: BLE001
print(f"synapse admin list skipped: {exc}")
entries = []
now_ms = int(time.time() * 1000)
for entry in entries:
user_id = entry.get("name") or ""
if not user_id.startswith("@"):
continue
localpart = user_id.split(":", 1)[0].lstrip("@")
if localpart in mas_usernames:
continue
is_guest = entry.get("is_guest")
if is_guest and should_prune_guest(entry, now_ms):
if prune_guest(seeder_token, user_id):
continue
if not (is_guest or needs_rename_username(localpart)):
continue
display = get_displayname_admin(seeder_token, user_id)
if display and not needs_rename_display(display):
continue
new = None
for _ in range(30):
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
if candidate not in existing:
new = candidate
existing.add(candidate)
break
if not new:
continue
if not set_displayname_admin(seeder_token, user_id, new):
continue
db_rename_numeric(existing)
finally:
mas_revoke_session(admin_token, seeder_session)
PY

View File

@ -34,11 +34,7 @@ resources:
- livekit-token-deployment.yaml
- livekit.yaml
- coturn.yaml
- seed-othrys-room.yaml
- guest-name-job.yaml
- oneoffs/othrys-kick-numeric-job.yaml
- pin-othrys-job.yaml
- reset-othrys-room-job.yaml
- oneoffs/bstein-force-leave-job.yaml
- livekit-ingress.yaml
- livekit-middlewares.yaml

View File

@ -1,169 +0,0 @@
# services/comms/pin-othrys-job.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: pin-othrys-invite
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/30 * * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "comms"
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
vault.hashicorp.com/agent-inject-template-turn-secret: |
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
vault.hashicorp.com/agent-inject-template-livekit-primary: |
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-bot-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-seeder-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-matrix: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-homepage: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
spec:
restartPolicy: Never
serviceAccountName: comms-vault
containers:
- name: pin
image: python:3.11-slim
env:
- name: SYNAPSE_BASE
value: http://othrys-synapse-matrix-synapse:8008
- name: AUTH_BASE
value: http://matrix-authentication-service:8080
- name: SEEDER_USER
value: othrys-seeder
command:
- /bin/sh
- -c
- |
set -euo pipefail
. /vault/scripts/comms_vault_env.sh
pip install --no-cache-dir requests >/dev/null
python - <<'PY'
import os, requests, urllib.parse
BASE = os.environ["SYNAPSE_BASE"]
AUTH_BASE = os.environ.get("AUTH_BASE", BASE)
ROOM_ALIAS = "#othrys:live.bstein.dev"
MESSAGE = (
"Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join "
"and choose 'Continue' -> 'Join as guest'."
)
def auth(token): return {"Authorization": f"Bearer {token}"}
def canon_user(user):
u = (user or "").strip()
if u.startswith("@") and ":" in u:
return u
u = u.lstrip("@")
if ":" in u:
return f"@{u}"
return f"@{u}:live.bstein.dev"
def login(user, password):
r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={
"type": "m.login.password",
"identifier": {"type": "m.id.user", "user": canon_user(user)},
"password": password,
})
r.raise_for_status()
return r.json()["access_token"]
def resolve(alias, token):
enc = urllib.parse.quote(alias)
r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token))
r.raise_for_status()
return r.json()["room_id"]
def get_pinned(room_id, token):
r = requests.get(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events",
headers=auth(token),
)
if r.status_code == 404:
return []
r.raise_for_status()
return r.json().get("pinned", [])
def get_event(room_id, event_id, token):
r = requests.get(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/event/{urllib.parse.quote(event_id)}",
headers=auth(token),
)
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()
def send(room_id, token, body):
r = requests.post(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message",
headers=auth(token),
json={"msgtype": "m.text", "body": body},
)
r.raise_for_status()
return r.json()["event_id"]
def pin(room_id, token, event_id):
r = requests.put(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events",
headers=auth(token),
json={"pinned": [event_id]},
)
r.raise_for_status()
token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"])
room_id = resolve(ROOM_ALIAS, token)
for event_id in get_pinned(room_id, token):
ev = get_event(room_id, event_id, token)
if ev and ev.get("content", {}).get("body") == MESSAGE:
raise SystemExit(0)
eid = send(room_id, token, MESSAGE)
pin(room_id, token, eid)
PY
volumeMounts:
- name: vault-scripts
mountPath: /vault/scripts
readOnly: true
volumes:
- name: vault-scripts
configMap:
name: comms-vault-env
defaultMode: 0555

View File

@ -1,312 +0,0 @@
# services/comms/reset-othrys-room-job.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: othrys-room-reset
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "0 0 1 1 *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "comms"
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
vault.hashicorp.com/agent-inject-template-turn-secret: |
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
vault.hashicorp.com/agent-inject-template-livekit-primary: |
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-bot-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-seeder-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-matrix: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-homepage: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
spec:
restartPolicy: Never
serviceAccountName: comms-vault
containers:
- name: reset
image: python:3.11-slim
env:
- name: SYNAPSE_BASE
value: http://othrys-synapse-matrix-synapse:8008
- name: AUTH_BASE
value: http://matrix-authentication-service:8080
- name: SERVER_NAME
value: live.bstein.dev
- name: ROOM_ALIAS
value: "#othrys:live.bstein.dev"
- name: ROOM_NAME
value: Othrys
- name: PIN_MESSAGE
value: "Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join and choose 'Continue' -> 'Join as guest'."
- name: SEEDER_USER
value: othrys-seeder
- name: BOT_USER
value: atlasbot
command:
- /bin/sh
- -c
- |
set -euo pipefail
. /vault/scripts/comms_vault_env.sh
pip install --no-cache-dir requests >/dev/null
python - <<'PY'
import os
import time
import urllib.parse
import requests
BASE = os.environ["SYNAPSE_BASE"]
AUTH_BASE = os.environ.get("AUTH_BASE", BASE)
SERVER_NAME = os.environ.get("SERVER_NAME", "live.bstein.dev")
ROOM_ALIAS = os.environ.get("ROOM_ALIAS", "#othrys:live.bstein.dev")
ROOM_NAME = os.environ.get("ROOM_NAME", "Othrys")
PIN_MESSAGE = os.environ["PIN_MESSAGE"]
SEEDER_USER = os.environ["SEEDER_USER"]
SEEDER_PASS = os.environ["SEEDER_PASS"]
BOT_USER = os.environ["BOT_USER"]
POWER_LEVELS = {
"ban": 50,
"events": {
"m.room.avatar": 50,
"m.room.canonical_alias": 50,
"m.room.encryption": 100,
"m.room.history_visibility": 100,
"m.room.name": 50,
"m.room.power_levels": 100,
"m.room.server_acl": 100,
"m.room.tombstone": 100,
},
"events_default": 0,
"historical": 100,
"invite": 50,
"kick": 50,
"m.call.invite": 50,
"redact": 50,
"state_default": 50,
"users": {f"@{SEEDER_USER}:{SERVER_NAME}": 100},
"users_default": 0,
}
def auth(token):
return {"Authorization": f"Bearer {token}"}
def canon_user(user):
u = (user or "").strip()
if u.startswith("@") and ":" in u:
return u
u = u.lstrip("@")
if ":" in u:
return f"@{u}"
return f"@{u}:{SERVER_NAME}"
def login(user, password):
r = requests.post(
f"{AUTH_BASE}/_matrix/client/v3/login",
json={
"type": "m.login.password",
"identifier": {"type": "m.id.user", "user": canon_user(user)},
"password": password,
},
)
if r.status_code != 200:
raise SystemExit(f"login failed: {r.status_code} {r.text}")
return r.json()["access_token"]
def resolve_alias(token, alias):
enc = urllib.parse.quote(alias)
r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token))
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()["room_id"]
def create_room(token):
r = requests.post(
f"{BASE}/_matrix/client/v3/createRoom",
headers=auth(token),
json={
"preset": "public_chat",
"name": ROOM_NAME,
"room_version": "11",
},
)
r.raise_for_status()
return r.json()["room_id"]
def put_state(token, room_id, ev_type, content):
r = requests.put(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/{ev_type}",
headers=auth(token),
json=content,
)
r.raise_for_status()
def set_directory_visibility(token, room_id, visibility):
r = requests.put(
f"{BASE}/_matrix/client/v3/directory/list/room/{urllib.parse.quote(room_id)}",
headers=auth(token),
json={"visibility": visibility},
)
r.raise_for_status()
def delete_alias(token, alias):
enc = urllib.parse.quote(alias)
r = requests.delete(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token))
if r.status_code in (200, 202, 404):
return
r.raise_for_status()
def put_alias(token, alias, room_id):
enc = urllib.parse.quote(alias)
r = requests.put(
f"{BASE}/_matrix/client/v3/directory/room/{enc}",
headers=auth(token),
json={"room_id": room_id},
)
r.raise_for_status()
def list_joined_members(token, room_id):
r = requests.get(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members?membership=join",
headers=auth(token),
)
r.raise_for_status()
members = []
for ev in r.json().get("chunk", []):
if ev.get("type") != "m.room.member":
continue
uid = ev.get("state_key")
if not isinstance(uid, str) or not uid.startswith("@"):
continue
members.append(uid)
return members
def invite_user(token, room_id, user_id):
r = requests.post(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/invite",
headers=auth(token),
json={"user_id": user_id},
)
if r.status_code in (200, 202):
return
r.raise_for_status()
def send_message(token, room_id, body):
r = requests.post(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message",
headers=auth(token),
json={"msgtype": "m.text", "body": body},
)
r.raise_for_status()
return r.json()["event_id"]
def login_with_retry():
last = None
for attempt in range(1, 6):
try:
return login(SEEDER_USER, SEEDER_PASS)
except Exception as exc: # noqa: BLE001
last = exc
time.sleep(attempt * 2)
raise last
token = login_with_retry()
old_room_id = resolve_alias(token, ROOM_ALIAS)
if not old_room_id:
raise SystemExit(f"alias {ROOM_ALIAS} not found; refusing to proceed")
new_room_id = create_room(token)
# Configure the new room.
put_state(token, new_room_id, "m.room.join_rules", {"join_rule": "public"})
put_state(token, new_room_id, "m.room.guest_access", {"guest_access": "can_join"})
put_state(token, new_room_id, "m.room.history_visibility", {"history_visibility": "shared"})
put_state(token, new_room_id, "m.room.power_levels", POWER_LEVELS)
# Move the alias.
delete_alias(token, ROOM_ALIAS)
put_alias(token, ROOM_ALIAS, new_room_id)
put_state(token, new_room_id, "m.room.canonical_alias", {"alias": ROOM_ALIAS})
set_directory_visibility(token, new_room_id, "public")
# Invite the bot and all joined members of the old room.
bot_user_id = f"@{BOT_USER}:{SERVER_NAME}"
invite_user(token, new_room_id, bot_user_id)
for uid in list_joined_members(token, old_room_id):
if uid == f"@{SEEDER_USER}:{SERVER_NAME}":
continue
localpart = uid.split(":", 1)[0].lstrip("@")
if localpart.isdigit():
continue
invite_user(token, new_room_id, uid)
# Pin the guest invite message in the new room.
event_id = send_message(token, new_room_id, PIN_MESSAGE)
put_state(token, new_room_id, "m.room.pinned_events", {"pinned": [event_id]})
# De-list and tombstone the old room.
set_directory_visibility(token, old_room_id, "private")
put_state(token, old_room_id, "m.room.join_rules", {"join_rule": "invite"})
put_state(token, old_room_id, "m.room.guest_access", {"guest_access": "forbidden"})
put_state(
token,
old_room_id,
"m.room.tombstone",
{"body": "Othrys has been reset. Please join the new room.", "replacement_room": new_room_id},
)
send_message(
token,
old_room_id,
"Othrys was reset. Join the new room at https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join",
)
print(f"old_room_id={old_room_id}")
print(f"new_room_id={new_room_id}")
PY
volumeMounts:
- name: vault-scripts
mountPath: /vault/scripts
readOnly: true
volumes:
- name: vault-scripts
configMap:
name: comms-vault-env
defaultMode: 0555

View File

@ -1,185 +0,0 @@
# services/comms/seed-othrys-room.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: seed-othrys-room
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/10 * * * *"
suspend: true
concurrencyPolicy: Forbid
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "comms"
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
vault.hashicorp.com/agent-inject-template-turn-secret: |
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
vault.hashicorp.com/agent-inject-template-livekit-primary: |
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-bot-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-seeder-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-matrix: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-homepage: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
spec:
restartPolicy: Never
serviceAccountName: comms-vault
containers:
- name: seed
image: python:3.11-slim
env:
- name: SYNAPSE_BASE
value: http://othrys-synapse-matrix-synapse:8008
- name: AUTH_BASE
value: http://matrix-authentication-service:8080
- name: SEEDER_USER
value: othrys-seeder
- name: BOT_USER
value: atlasbot
command:
- /bin/sh
- -c
- |
set -euo pipefail
. /vault/scripts/comms_vault_env.sh
pip install --no-cache-dir requests pyyaml >/dev/null
python - <<'PY'
import os, requests, urllib.parse
BASE = os.environ["SYNAPSE_BASE"]
AUTH_BASE = os.environ.get("AUTH_BASE", BASE)
def canon_user(user):
u = (user or "").strip()
if u.startswith("@") and ":" in u:
return u
u = u.lstrip("@")
if ":" in u:
return f"@{u}"
return f"@{u}:live.bstein.dev"
def login(user, password):
r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={
"type": "m.login.password",
"identifier": {"type": "m.id.user", "user": canon_user(user)},
"password": password,
})
if r.status_code != 200:
raise SystemExit(f"login failed: {r.status_code} {r.text}")
return r.json()["access_token"]
def ensure_user(token, localpart, password, admin):
headers = {"Authorization": f"Bearer {token}"}
user_id = f"@{localpart}:live.bstein.dev"
url = f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}"
res = requests.get(url, headers=headers)
if res.status_code == 200:
return
payload = {"password": password, "admin": admin, "deactivated": False}
create = requests.put(url, headers=headers, json=payload)
if create.status_code not in (200, 201):
raise SystemExit(f"create user {user_id} failed: {create.status_code} {create.text}")
def ensure_room(token):
headers = {"Authorization": f"Bearer {token}"}
alias = "#othrys:live.bstein.dev"
alias_enc = "%23othrys%3Alive.bstein.dev"
exists = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{alias_enc}", headers=headers)
if exists.status_code == 200:
room_id = exists.json()["room_id"]
else:
create = requests.post(f"{BASE}/_matrix/client/v3/createRoom", headers=headers, json={
"preset": "public_chat",
"name": "Othrys",
"room_alias_name": "othrys",
"initial_state": [],
"power_level_content_override": {"events_default": 0, "users_default": 0, "state_default": 50},
})
if create.status_code not in (200, 409):
raise SystemExit(f"create room failed: {create.status_code} {create.text}")
exists = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{alias_enc}", headers=headers)
room_id = exists.json()["room_id"]
state_events = [
("m.room.join_rules", {"join_rule": "public"}),
("m.room.guest_access", {"guest_access": "can_join"}),
("m.room.history_visibility", {"history_visibility": "shared"}),
("m.room.canonical_alias", {"alias": alias}),
]
for ev_type, content in state_events:
requests.put(f"{BASE}/_matrix/client/v3/rooms/{room_id}/state/{ev_type}", headers=headers, json=content)
requests.put(f"{BASE}/_matrix/client/v3/directory/list/room/{room_id}", headers=headers, json={"visibility": "public"})
return room_id
def join_user(token, room_id, user_id):
headers = {"Authorization": f"Bearer {token}"}
requests.post(f"{BASE}/_synapse/admin/v1/join/{urllib.parse.quote(room_id)}", headers=headers, json={"user_id": user_id})
def join_all_locals(token, room_id):
headers = {"Authorization": f"Bearer {token}"}
users = []
from_token = None
while True:
url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100"
if from_token:
url += f"&from={from_token}"
res = requests.get(url, headers=headers).json()
users.extend([u["name"] for u in res.get("users", [])])
from_token = res.get("next_token")
if not from_token:
break
for uid in users:
join_user(token, room_id, uid)
token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"])
ensure_user(token, os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"], admin=True)
ensure_user(token, os.environ["BOT_USER"], os.environ["BOT_PASS"], admin=False)
room_id = ensure_room(token)
join_user(token, room_id, f"@{os.environ['BOT_USER']}:live.bstein.dev")
join_all_locals(token, room_id)
PY
volumeMounts:
- name: synapse-config
mountPath: /config
readOnly: true
- name: vault-scripts
mountPath: /vault/scripts
readOnly: true
volumes:
- name: synapse-config
secret:
secretName: othrys-synapse-matrix-synapse
- name: vault-scripts
configMap:
name: comms-vault-env
defaultMode: 0555

View File

@ -1,56 +0,0 @@
# services/finance/firefly-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: firefly-cron
namespace: finance
spec:
schedule: "0 3 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 1
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "finance"
vault.hashicorp.com/agent-inject-secret-firefly-cron-token: "kv/data/atlas/finance/firefly-secrets"
vault.hashicorp.com/agent-inject-template-firefly-cron-token: |
{{- with secret "kv/data/atlas/finance/firefly-secrets" -}}
{{ .Data.data.STATIC_CRON_TOKEN }}
{{- end -}}
spec:
serviceAccountName: finance-vault
restartPolicy: Never
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 70
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: cron
image: curlimages/curl:8.5.0
command: ["/bin/sh", "-c"]
args:
- |
set -eu
token="$(cat /vault/secrets/firefly-cron-token)"
curl -fsS "http://firefly.finance.svc.cluster.local/api/v1/cron/${token}"

View File

@ -1,92 +0,0 @@
# services/finance/firefly-user-sync-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: firefly-user-sync
namespace: finance
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "0 6 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "finance"
vault.hashicorp.com/agent-inject-secret-firefly-env.sh: "kv/data/atlas/finance/firefly-db"
vault.hashicorp.com/agent-inject-template-firefly-env.sh: |
{{ with secret "kv/data/atlas/finance/firefly-db" }}
export DB_CONNECTION="pgsql"
export DB_HOST="{{ .Data.data.DB_HOST }}"
export DB_PORT="{{ .Data.data.DB_PORT }}"
export DB_DATABASE="{{ .Data.data.DB_DATABASE }}"
export DB_USERNAME="{{ .Data.data.DB_USERNAME }}"
export DB_PASSWORD="$(cat /vault/secrets/firefly-db-password)"
{{ end }}
{{ with secret "kv/data/atlas/finance/firefly-secrets" }}
export APP_KEY="$(cat /vault/secrets/firefly-app-key)"
{{ end }}
vault.hashicorp.com/agent-inject-secret-firefly-db-password: "kv/data/atlas/finance/firefly-db"
vault.hashicorp.com/agent-inject-template-firefly-db-password: |
{{- with secret "kv/data/atlas/finance/firefly-db" -}}
{{ .Data.data.DB_PASSWORD }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-firefly-app-key: "kv/data/atlas/finance/firefly-secrets"
vault.hashicorp.com/agent-inject-template-firefly-app-key: |
{{- with secret "kv/data/atlas/finance/firefly-secrets" -}}
{{ .Data.data.APP_KEY }}
{{- end -}}
spec:
serviceAccountName: finance-vault
restartPolicy: Never
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 70
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: sync
image: fireflyiii/core:version-6.4.15
command: ["/bin/sh", "-c"]
args:
- |
set -eu
. /vault/secrets/firefly-env.sh
exec php /scripts/firefly_user_sync.php
env:
- name: APP_ENV
value: production
- name: APP_DEBUG
value: "false"
- name: TZ
value: Etc/UTC
volumeMounts:
- name: firefly-user-sync-script
mountPath: /scripts
readOnly: true
volumes:
- name: firefly-user-sync-script
configMap:
name: firefly-user-sync-script
defaultMode: 0555

View File

@ -12,8 +12,6 @@ resources:
- oneoffs/finance-secrets-ensure-job.yaml
- actual-budget-deployment.yaml
- firefly-deployment.yaml
- firefly-user-sync-cronjob.yaml
- firefly-cronjob.yaml
- actual-budget-service.yaml
- firefly-service.yaml
- actual-budget-ingress.yaml
@ -24,9 +22,6 @@ configMapGenerator:
- name: actual-openid-bootstrap-script
files:
- actual_openid_bootstrap.mjs=scripts/actual_openid_bootstrap.mjs
- name: firefly-user-sync-script
files:
- firefly_user_sync.php=scripts/firefly_user_sync.php
- name: finance-secrets-ensure-script
files:
- finance_secrets_ensure.py=scripts/finance_secrets_ensure.py

View File

@ -1,114 +0,0 @@
#!/usr/bin/env php
<?php
declare(strict_types=1);
use FireflyIII\Console\Commands\Correction\CreatesGroupMemberships;
use FireflyIII\Models\Role;
use FireflyIII\Repositories\User\UserRepositoryInterface;
use FireflyIII\Support\Facades\FireflyConfig;
use FireflyIII\User;
use Illuminate\Contracts\Console\Kernel as ConsoleKernel;
function log_line(string $message): void
{
fwrite(STDOUT, $message . PHP_EOL);
}
function error_line(string $message): void
{
fwrite(STDERR, $message . PHP_EOL);
}
function find_app_root(): string
{
$candidates = [];
$env_root = getenv('FIREFLY_APP_DIR') ?: '';
if ($env_root !== '') {
$candidates[] = $env_root;
}
$candidates[] = '/var/www/html';
$candidates[] = '/var/www/firefly-iii';
$candidates[] = '/app';
foreach ($candidates as $candidate) {
if (!is_dir($candidate)) {
continue;
}
if (file_exists($candidate . '/vendor/autoload.php')) {
return $candidate;
}
}
return '';
}
$email = trim((string) getenv('FIREFLY_USER_EMAIL'));
$password = (string) getenv('FIREFLY_USER_PASSWORD');
if ($email === '' || $password === '') {
error_line('missing FIREFLY_USER_EMAIL or FIREFLY_USER_PASSWORD');
exit(1);
}
$root = find_app_root();
if ($root === '') {
error_line('firefly app root not found');
exit(1);
}
$autoload = $root . '/vendor/autoload.php';
$app_bootstrap = $root . '/bootstrap/app.php';
if (!file_exists($autoload) || !file_exists($app_bootstrap)) {
error_line('firefly bootstrap files missing');
exit(1);
}
require $autoload;
$app = require $app_bootstrap;
$kernel = $app->make(ConsoleKernel::class);
$kernel->bootstrap();
try {
FireflyConfig::set('single_user_mode', true);
} catch (Throwable $exc) {
error_line('failed to enforce single_user_mode: '.$exc->getMessage());
}
$repository = $app->make(UserRepositoryInterface::class);
$existing_user = User::where('email', $email)->first();
$first_user = User::count() == 0;
if (!$existing_user) {
$existing_user = User::create(
[
'email' => $email,
'password' => bcrypt($password),
'blocked' => false,
'blocked_code' => null,
]
);
if ($first_user) {
$role = Role::where('name', 'owner')->first();
if ($role) {
$existing_user->roles()->attach($role);
}
}
log_line(sprintf('created firefly user %s', $email));
} else {
log_line(sprintf('updating firefly user %s', $email));
}
$existing_user->blocked = false;
$existing_user->blocked_code = null;
$existing_user->save();
$repository->changePassword($existing_user, $password);
CreatesGroupMemberships::createGroupMembership($existing_user);
log_line('firefly user sync complete');

View File

@ -8,18 +8,8 @@ resources:
- portal-rbac.yaml
- wger-media-pvc.yaml
- wger-static-pvc.yaml
- wger-admin-ensure-cronjob.yaml
- wger-user-sync-cronjob.yaml
- wger-deployment.yaml
- wger-service.yaml
- wger-ingress.yaml
generatorOptions:
disableNameSuffixHash: true
configMapGenerator:
- name: wger-nginx-config
files:
- default.conf=config/nginx.conf
- nginx.conf=config/nginx-main.conf
- name: wger-user-sync-script
files:
- wger_user_sync.py=scripts/wger_user_sync.py

View File

@ -1,120 +0,0 @@
#!/usr/bin/env python3
from __future__ import annotations
import os
import sys
import django
def _env(name: str, default: str = "") -> str:
value = os.getenv(name, default)
return value.strip() if isinstance(value, str) else ""
def _setup_django() -> None:
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings.main")
django.setup()
def _set_default_gym(user) -> None:
try:
from wger.gym.models import GymConfig
except Exception:
return
try:
config = GymConfig.objects.first()
except Exception:
return
if not config or not getattr(config, "default_gym", None):
return
profile = getattr(user, "userprofile", None)
if not profile or getattr(profile, "gym", None):
return
profile.gym = config.default_gym
profile.save()
def _ensure_profile(user) -> None:
profile = getattr(user, "userprofile", None)
if not profile:
return
if hasattr(profile, "email_verified") and not profile.email_verified:
profile.email_verified = True
if hasattr(profile, "is_temporary") and profile.is_temporary:
profile.is_temporary = False
profile.save()
def _ensure_admin(username: str, password: str, email: str) -> None:
from django.contrib.auth.models import User
if not username or not password:
raise RuntimeError("admin username/password missing")
user, created = User.objects.get_or_create(username=username)
if created:
user.is_active = True
if not user.is_staff:
user.is_staff = True
if email:
user.email = email
user.set_password(password)
user.save()
_ensure_profile(user)
_set_default_gym(user)
print(f"ensured admin user {username}")
def _ensure_user(username: str, password: str, email: str) -> None:
from django.contrib.auth.models import User
if not username or not password:
raise RuntimeError("username/password missing")
user, created = User.objects.get_or_create(username=username)
if created:
user.is_active = True
if email and user.email != email:
user.email = email
user.set_password(password)
user.save()
_ensure_profile(user)
_set_default_gym(user)
action = "created" if created else "updated"
print(f"{action} user {username}")
def main() -> int:
admin_user = _env("WGER_ADMIN_USERNAME")
admin_password = _env("WGER_ADMIN_PASSWORD")
admin_email = _env("WGER_ADMIN_EMAIL")
username = _env("WGER_USERNAME") or _env("ONLY_USERNAME")
password = _env("WGER_PASSWORD")
email = _env("WGER_EMAIL")
if not any([admin_user and admin_password, username and password]):
print("no admin or user payload provided; exiting")
return 0
_setup_django()
if admin_user and admin_password:
_ensure_admin(admin_user, admin_password, admin_email)
if username and password:
_ensure_user(username, password, email)
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@ -1,120 +0,0 @@
# services/health/wger-admin-ensure-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: wger-admin-ensure
namespace: health
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "15 3 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 1
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "health"
vault.hashicorp.com/agent-inject-secret-wger-env: "kv/data/atlas/health/wger-db"
vault.hashicorp.com/agent-inject-template-wger-env: |
{{ with secret "kv/data/atlas/health/wger-db" }}
export DJANGO_DB_HOST="{{ .Data.data.DJANGO_DB_HOST }}"
export DJANGO_DB_PORT="{{ .Data.data.DJANGO_DB_PORT }}"
export DJANGO_DB_DATABASE="{{ .Data.data.DJANGO_DB_DATABASE }}"
export DJANGO_DB_USER="{{ .Data.data.DJANGO_DB_USER }}"
export DJANGO_DB_PASSWORD="$(cat /vault/secrets/wger-db-password)"
{{ end }}
{{ with secret "kv/data/atlas/health/wger-secrets" }}
export SECRET_KEY="$(cat /vault/secrets/wger-secret-key)"
export SIGNING_KEY="$(cat /vault/secrets/wger-signing-key)"
{{ end }}
{{ with secret "kv/data/atlas/health/wger-admin" }}
export WGER_ADMIN_USERNAME="$(cat /vault/secrets/wger-admin-username)"
export WGER_ADMIN_PASSWORD="$(cat /vault/secrets/wger-admin-password)"
{{ end }}
vault.hashicorp.com/agent-inject-secret-wger-db-password: "kv/data/atlas/health/wger-db"
vault.hashicorp.com/agent-inject-template-wger-db-password: |
{{- with secret "kv/data/atlas/health/wger-db" -}}
{{ .Data.data.DJANGO_DB_PASSWORD }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-secret-key: "kv/data/atlas/health/wger-secrets"
vault.hashicorp.com/agent-inject-template-wger-secret-key: |
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
{{ .Data.data.SECRET_KEY }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-signing-key: "kv/data/atlas/health/wger-secrets"
vault.hashicorp.com/agent-inject-template-wger-signing-key: |
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
{{ .Data.data.SIGNING_KEY }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-admin-username: "kv/data/atlas/health/wger-admin"
vault.hashicorp.com/agent-inject-template-wger-admin-username: |
{{- with secret "kv/data/atlas/health/wger-admin" -}}
{{ .Data.data.username }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-admin-password: "kv/data/atlas/health/wger-admin"
vault.hashicorp.com/agent-inject-template-wger-admin-password: |
{{- with secret "kv/data/atlas/health/wger-admin" -}}
{{ .Data.data.password }}
{{- end -}}
spec:
serviceAccountName: health-vault-sync
restartPolicy: Never
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 70
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: ensure
image: wger/server@sha256:710588b78af4e0aa0b4d8a8061e4563e16eae80eeaccfe7f9e0d9cbdd7f0cbc5
imagePullPolicy: IfNotPresent
command: ["/bin/sh", "-c"]
args:
- |
set -eu
. /vault/secrets/wger-env
exec python3 /scripts/wger_user_sync.py
env:
- name: SITE_URL
value: https://health.bstein.dev
- name: TIME_ZONE
value: Etc/UTC
- name: TZ
value: Etc/UTC
- name: DJANGO_DEBUG
value: "False"
- name: DJANGO_DB_ENGINE
value: django.db.backends.postgresql
- name: DJANGO_CACHE_BACKEND
value: django.core.cache.backends.locmem.LocMemCache
- name: DJANGO_CACHE_LOCATION
value: wger-cache
volumeMounts:
- name: wger-user-sync-script
mountPath: /scripts
readOnly: true
volumes:
- name: wger-user-sync-script
configMap:
name: wger-user-sync-script
defaultMode: 0555

View File

@ -1,106 +0,0 @@
# services/health/wger-user-sync-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: wger-user-sync
namespace: health
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "0 5 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "health"
vault.hashicorp.com/agent-inject-secret-wger-env: "kv/data/atlas/health/wger-db"
vault.hashicorp.com/agent-inject-template-wger-env: |
{{ with secret "kv/data/atlas/health/wger-db" }}
export DJANGO_DB_HOST="{{ .Data.data.DJANGO_DB_HOST }}"
export DJANGO_DB_PORT="{{ .Data.data.DJANGO_DB_PORT }}"
export DJANGO_DB_DATABASE="{{ .Data.data.DJANGO_DB_DATABASE }}"
export DJANGO_DB_USER="{{ .Data.data.DJANGO_DB_USER }}"
export DJANGO_DB_PASSWORD="$(cat /vault/secrets/wger-db-password)"
{{ end }}
{{ with secret "kv/data/atlas/health/wger-secrets" }}
export SECRET_KEY="$(cat /vault/secrets/wger-secret-key)"
export SIGNING_KEY="$(cat /vault/secrets/wger-signing-key)"
{{ end }}
vault.hashicorp.com/agent-inject-secret-wger-db-password: "kv/data/atlas/health/wger-db"
vault.hashicorp.com/agent-inject-template-wger-db-password: |
{{- with secret "kv/data/atlas/health/wger-db" -}}
{{ .Data.data.DJANGO_DB_PASSWORD }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-secret-key: "kv/data/atlas/health/wger-secrets"
vault.hashicorp.com/agent-inject-template-wger-secret-key: |
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
{{ .Data.data.SECRET_KEY }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-signing-key: "kv/data/atlas/health/wger-secrets"
vault.hashicorp.com/agent-inject-template-wger-signing-key: |
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
{{ .Data.data.SIGNING_KEY }}
{{- end -}}
spec:
serviceAccountName: health-vault-sync
restartPolicy: Never
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 70
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: sync
image: wger/server@sha256:710588b78af4e0aa0b4d8a8061e4563e16eae80eeaccfe7f9e0d9cbdd7f0cbc5
imagePullPolicy: IfNotPresent
command: ["/bin/sh", "-c"]
args:
- |
set -eu
. /vault/secrets/wger-env
exec python3 /scripts/wger_user_sync.py
env:
- name: SITE_URL
value: https://health.bstein.dev
- name: TIME_ZONE
value: Etc/UTC
- name: TZ
value: Etc/UTC
- name: DJANGO_DEBUG
value: "False"
- name: DJANGO_DB_ENGINE
value: django.db.backends.postgresql
- name: DJANGO_CACHE_BACKEND
value: django.core.cache.backends.locmem.LocMemCache
- name: DJANGO_CACHE_LOCATION
value: wger-cache
volumeMounts:
- name: wger-user-sync-script
mountPath: /scripts
readOnly: true
volumes:
- name: wger-user-sync-script
configMap:
name: wger-user-sync-script
defaultMode: 0555

View File

@ -14,7 +14,6 @@ resources:
- serverstransport.yaml
- ingressroute.yaml
- oneoffs/mailu-sync-job.yaml
- mailu-sync-cronjob.yaml
- front-lb.yaml
configMapGenerator:

View File

@ -1,93 +0,0 @@
# services/mailu/mailu-sync-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: mailu-sync-nightly
namespace: mailu-mailserver
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "30 4 * * *"
suspend: true
concurrencyPolicy: Forbid
jobTemplate:
spec:
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "mailu-mailserver"
vault.hashicorp.com/agent-inject-secret-mailu-db-secret__database: "kv/data/atlas/mailu/mailu-db-secret"
vault.hashicorp.com/agent-inject-template-mailu-db-secret__database: |
{{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.database }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mailu-db-secret__username: "kv/data/atlas/mailu/mailu-db-secret"
vault.hashicorp.com/agent-inject-template-mailu-db-secret__username: |
{{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.username }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mailu-db-secret__password: "kv/data/atlas/mailu/mailu-db-secret"
vault.hashicorp.com/agent-inject-template-mailu-db-secret__password: |
{{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.password }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-id: "kv/data/atlas/mailu/mailu-sync-credentials"
vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-id: |
{{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-id" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-secret: "kv/data/atlas/mailu/mailu-sync-credentials"
vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-secret: |
{{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-secret" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mailu-initial-account-secret__password: "kv/data/atlas/mailu/mailu-initial-account-secret"
vault.hashicorp.com/agent-inject-template-mailu-initial-account-secret__password: |
{{- with secret "kv/data/atlas/mailu/mailu-initial-account-secret" -}}{{ .Data.data.password }}{{- end -}}
spec:
restartPolicy: OnFailure
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
serviceAccountName: mailu-vault-sync
containers:
- name: mailu-sync
image: python:3.11-alpine
imagePullPolicy: IfNotPresent
command: ["/bin/sh", "-c"]
args:
- |
set -euo pipefail
. /vault/scripts/mailu_vault_env.sh
pip install --no-cache-dir requests psycopg2-binary passlib >/tmp/pip.log \
&& python /app/sync.py
env:
- name: KEYCLOAK_BASE_URL
value: http://keycloak.sso.svc.cluster.local
- name: KEYCLOAK_REALM
value: atlas
- name: MAILU_DOMAIN
value: bstein.dev
- name: MAILU_DEFAULT_QUOTA
value: "20000000000"
- name: MAILU_SYSTEM_USERS
value: "no-reply-portal@bstein.dev,no-reply-vaultwarden@bstein.dev"
- name: MAILU_DB_HOST
value: postgres-service.postgres.svc.cluster.local
- name: MAILU_DB_PORT
value: "5432"
volumeMounts:
- name: sync-script
mountPath: /app/sync.py
subPath: sync.py
- name: vault-scripts
mountPath: /vault/scripts
readOnly: true
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 200m
memory: 256Mi
volumes:
- name: sync-script
configMap:
name: mailu-sync-script
defaultMode: 0444
- name: vault-scripts
configMap:
name: mailu-vault-env
defaultMode: 0555

View File

@ -308,9 +308,9 @@ spec:
- name: ARIADNE_SCHEDULE_IMAGE_SWEEPER
value: "0 */4 * * *"
- name: ARIADNE_SCHEDULE_VAULT_K8S_AUTH
value: "0 0 1 1 *"
value: "*/15 * * * *"
- name: ARIADNE_SCHEDULE_VAULT_OIDC
value: "0 0 1 1 *"
value: "*/15 * * * *"
- name: ARIADNE_SCHEDULE_COMMS_GUEST_NAME
value: "*/5 * * * *"
- name: ARIADNE_SCHEDULE_COMMS_PIN_INVITE

View File

@ -235,7 +235,7 @@
{
"id": 4,
"type": "stat",
"title": "Glue Jobs Stale (>36h)",
"title": "Ariadne Schedules Stale (>36h)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -248,7 +248,7 @@
},
"targets": [
{
"expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1))) or on() vector(0)",
"expr": "sum((((time() - ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room)$\"})) > bool 129600)) or on() vector(0)",
"refId": "A"
}
],
@ -303,7 +303,7 @@
{
"id": 5,
"type": "stat",
"title": "Glue Jobs Missing Success",
"title": "Ariadne Schedules Missing Success",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -316,7 +316,7 @@
},
"targets": [
{
"expr": "count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) or on() vector(0)",
"expr": "count((ariadne_schedule_next_run_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"} unless on(task) ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room)$\"})) or on() vector(0)",
"refId": "A"
}
],
@ -363,7 +363,7 @@
{
"id": 6,
"type": "stat",
"title": "Glue Jobs Suspended",
"title": "Ariadne Schedules Failed Last Run",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -376,7 +376,7 @@
},
"targets": [
{
"expr": "sum((kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1) or on() vector(0)",
"expr": "sum(((1 - ariadne_schedule_last_status{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room)$\"}) > bool 0)) or on() vector(0)",
"refId": "A"
}
],
@ -616,7 +616,7 @@
},
"targets": [
{
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600)",
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"}[$__range])) / 3600)",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -691,7 +691,7 @@
},
"targets": [
{
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600)",
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"}[$__range])) / 3600)",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -753,7 +753,7 @@
{
"id": 12,
"type": "bargauge",
"title": "Glue Jobs Last Success (hours ago)",
"title": "Ariadne Fast Schedule Last Success (hours ago)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -766,9 +766,9 @@
},
"targets": [
{
"expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)",
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_seed_room)$\"}[$__range])) / 3600)",
"refId": "A",
"legendFormat": "{{namespace}}/{{cronjob}}",
"legendFormat": "{{task}}",
"instant": true
}
],
@ -828,7 +828,7 @@
{
"id": 13,
"type": "bargauge",
"title": "Glue Jobs Last Schedule (hours ago)",
"title": "Ariadne Fast Schedule Next Run (hours from now)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -841,9 +841,9 @@
},
"targets": [
{
"expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)",
"expr": "sort_desc(((ariadne_schedule_next_run_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"} - time()) / 3600))",
"refId": "A",
"legendFormat": "{{namespace}}/{{cronjob}}",
"legendFormat": "{{task}}",
"instant": true
}
],
@ -1348,6 +1348,6 @@
"tags": [
"atlas",
"jobs",
"glue"
"ariadne"
]
}

View File

@ -244,7 +244,7 @@ data:
{
"id": 4,
"type": "stat",
"title": "Glue Jobs Stale (>36h)",
"title": "Ariadne Schedules Stale (>36h)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -257,7 +257,7 @@ data:
},
"targets": [
{
"expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1))) or on() vector(0)",
"expr": "sum((((time() - ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room)$\"})) > bool 129600)) or on() vector(0)",
"refId": "A"
}
],
@ -312,7 +312,7 @@ data:
{
"id": 5,
"type": "stat",
"title": "Glue Jobs Missing Success",
"title": "Ariadne Schedules Missing Success",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -325,7 +325,7 @@ data:
},
"targets": [
{
"expr": "count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) or on() vector(0)",
"expr": "count((ariadne_schedule_next_run_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"} unless on(task) ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room)$\"})) or on() vector(0)",
"refId": "A"
}
],
@ -372,7 +372,7 @@ data:
{
"id": 6,
"type": "stat",
"title": "Glue Jobs Suspended",
"title": "Ariadne Schedules Failed Last Run",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -385,7 +385,7 @@ data:
},
"targets": [
{
"expr": "sum((kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1) or on() vector(0)",
"expr": "sum(((1 - ariadne_schedule_last_status{task=~\"^(schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.firefly_user_sync|schedule.comms_guest_name|schedule.comms_seed_room)$\"}) > bool 0)) or on() vector(0)",
"refId": "A"
}
],
@ -625,7 +625,7 @@ data:
},
"targets": [
{
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds[$__range])) / 3600)",
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_error_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"}[$__range])) / 3600)",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -700,7 +700,7 @@ data:
},
"targets": [
{
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds[$__range])) / 3600)",
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"}[$__range])) / 3600)",
"refId": "A",
"legendFormat": "{{task}}",
"instant": true
@ -762,7 +762,7 @@ data:
{
"id": 12,
"type": "bargauge",
"title": "Glue Jobs Last Success (hours ago)",
"title": "Ariadne Fast Schedule Last Success (hours ago)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -775,9 +775,9 @@ data:
},
"targets": [
{
"expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)",
"expr": "sort_desc((time() - max_over_time(ariadne_schedule_last_success_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_seed_room)$\"}[$__range])) / 3600)",
"refId": "A",
"legendFormat": "{{namespace}}/{{cronjob}}",
"legendFormat": "{{task}}",
"instant": true
}
],
@ -837,7 +837,7 @@ data:
{
"id": 13,
"type": "bargauge",
"title": "Glue Jobs Last Schedule (hours ago)",
"title": "Ariadne Fast Schedule Next Run (hours from now)",
"datasource": {
"type": "prometheus",
"uid": "atlas-vm"
@ -850,9 +850,9 @@ data:
},
"targets": [
{
"expr": "sort_desc((time() - max_over_time((kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})[$__range])) / 3600)",
"expr": "sort_desc(((ariadne_schedule_next_run_timestamp_seconds{task=~\"^(schedule.mailu_sync|schedule.nextcloud_sync|schedule.nextcloud_cron|schedule.nextcloud_maintenance|schedule.vaultwarden_sync|schedule.wger_user_sync|schedule.wger_admin|schedule.firefly_user_sync|schedule.firefly_cron|schedule.vault_k8s_auth|schedule.vault_oidc|schedule.comms_guest_name|schedule.comms_pin_invite|schedule.comms_reset_room|schedule.comms_seed_room)$\"} - time()) / 3600))",
"refId": "A",
"legendFormat": "{{namespace}}/{{cronjob}}",
"legendFormat": "{{task}}",
"instant": true
}
],
@ -1357,6 +1357,6 @@ data:
"tags": [
"atlas",
"jobs",
"glue"
"ariadne"
]
}

View File

@ -1,113 +0,0 @@
# services/nextcloud-mail-sync/cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: nextcloud-mail-sync
namespace: nextcloud
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "0 5 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 1
jobTemplate:
spec:
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "nextcloud"
vault.hashicorp.com/agent-inject-secret-nextcloud-env.sh: "kv/data/atlas/nextcloud/nextcloud-db"
vault.hashicorp.com/agent-inject-template-nextcloud-env.sh: |
{{ with secret "kv/data/atlas/nextcloud/nextcloud-db" }}
export POSTGRES_DB="{{ .Data.data.database }}"
export POSTGRES_USER="{{ index .Data.data "db-username" }}"
export POSTGRES_PASSWORD="{{ index .Data.data "db-password" }}"
{{ end }}
{{ with secret "kv/data/atlas/nextcloud/nextcloud-admin" }}
export NEXTCLOUD_ADMIN_USER="{{ index .Data.data "admin-user" }}"
export NEXTCLOUD_ADMIN_PASSWORD="{{ index .Data.data "admin-password" }}"
{{ end }}
export ADMIN_USER="${NEXTCLOUD_ADMIN_USER}"
export ADMIN_PASS="${NEXTCLOUD_ADMIN_PASSWORD}"
{{ with secret "kv/data/atlas/nextcloud/nextcloud-oidc" }}
export OIDC_CLIENT_ID="{{ index .Data.data "client-id" }}"
export OIDC_CLIENT_SECRET="{{ index .Data.data "client-secret" }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/postmark-relay" }}
export SMTP_NAME="{{ index .Data.data "apikey" }}"
export SMTP_PASSWORD="{{ index .Data.data "apikey" }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/keycloak-admin" }}
export KC_ADMIN_USER="{{ .Data.data.username }}"
export KC_ADMIN_PASS="{{ .Data.data.password }}"
{{ end }}
spec:
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
restartPolicy: OnFailure
securityContext:
runAsUser: 0
runAsGroup: 0
serviceAccountName: nextcloud-vault
containers:
- name: mail-sync
image: nextcloud:29-apache
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
env:
- name: KC_BASE
value: http://keycloak.sso.svc.cluster.local
- name: KC_REALM
value: atlas
- name: MAILU_DOMAIN
value: bstein.dev
- name: POSTGRES_HOST
value: postgres-service.postgres.svc.cluster.local
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi
volumeMounts:
- name: nextcloud-web
mountPath: /var/www/html
- name: nextcloud-config-pvc
mountPath: /var/www/html/config
- name: nextcloud-custom-apps
mountPath: /var/www/html/custom_apps
- name: nextcloud-user-data
mountPath: /var/www/html/data
- name: sync-script
mountPath: /sync/sync.sh
subPath: sync.sh
args:
- |
set -eu
. /vault/secrets/nextcloud-env.sh
exec /sync/sync.sh
volumes:
- name: nextcloud-config-pvc
persistentVolumeClaim:
claimName: nextcloud-config-v2
- name: nextcloud-custom-apps
persistentVolumeClaim:
claimName: nextcloud-custom-apps-v2
- name: nextcloud-user-data
persistentVolumeClaim:
claimName: nextcloud-user-data-v2
- name: nextcloud-web
persistentVolumeClaim:
claimName: nextcloud-web-v2
- name: sync-script
configMap:
name: nextcloud-mail-sync-script
defaultMode: 0755

View File

@ -3,11 +3,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: nextcloud
resources:
- cronjob.yaml
- portal-rbac.yaml
configMapGenerator:
- name: nextcloud-mail-sync-script
files:
- sync.sh=scripts/nextcloud-mail-sync.sh
options:
disableNameSuffixHash: true

View File

@ -1,235 +0,0 @@
#!/bin/bash
set -euo pipefail
KC_BASE="${KC_BASE:?}"
KC_REALM="${KC_REALM:?}"
KC_ADMIN_USER="${KC_ADMIN_USER:?}"
KC_ADMIN_PASS="${KC_ADMIN_PASS:?}"
MAILU_DOMAIN="${MAILU_DOMAIN:?}"
ONLY_USERNAME="${ONLY_USERNAME:-}"
POSTGRES_HOST="${POSTGRES_HOST:-}"
POSTGRES_DB="${POSTGRES_DB:-}"
POSTGRES_USER="${POSTGRES_USER:-}"
POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-}"
if ! command -v jq >/dev/null 2>&1; then
apt-get update && apt-get install -y jq curl >/dev/null
fi
ensure_psql() {
if command -v psql >/dev/null 2>&1; then
return 0
fi
apt-get update && apt-get install -y postgresql-client >/dev/null
}
set_editor_mode_richtext() {
local ids=("$@")
if [[ ${#ids[@]} -eq 0 ]]; then
return 0
fi
if [[ -z "${POSTGRES_HOST}" || -z "${POSTGRES_DB}" || -z "${POSTGRES_USER}" || -z "${POSTGRES_PASSWORD}" ]]; then
echo "WARN: missing postgres env; cannot update mail editor_mode" >&2
return 0
fi
ensure_psql
local ids_csv
ids_csv=$(IFS=,; echo "${ids[*]}")
PGPASSWORD="${POSTGRES_PASSWORD}" psql \
-h "${POSTGRES_HOST}" \
-U "${POSTGRES_USER}" \
-d "${POSTGRES_DB}" \
-v ON_ERROR_STOP=1 \
-c "UPDATE oc_mail_accounts SET editor_mode='richtext' WHERE id IN (${ids_csv}) AND editor_mode <> 'richtext';" \
>/dev/null
}
list_mail_accounts() {
local user_id="${1}"
local export_out
# Nextcloud Mail does not provide a list command; export is safe (does not print passwords).
if ! export_out=$(/usr/sbin/runuser -u www-data -- php occ mail:account:export "${user_id}"); then
echo "WARN: unable to export mail accounts for ${user_id}; skipping sync for safety" >&2
return 1
fi
awk -v OFS='\t' '
BEGIN { IGNORECASE=1; id="" }
$1 == "Account" { id=$2; sub(":", "", id); next }
$1 == "-" && tolower($2) ~ /^e-?mail:$/ { if (id) print id, $3 }
' <<<"${export_out}" | sort -u
}
token=$(
curl -fsS \
--data-urlencode "grant_type=password" \
--data-urlencode "client_id=admin-cli" \
--data-urlencode "username=${KC_ADMIN_USER}" \
--data-urlencode "password=${KC_ADMIN_PASS}" \
"${KC_BASE}/realms/master/protocol/openid-connect/token" | jq -r '.access_token // empty'
)
if [[ -z "${token}" || "${token}" == "null" ]]; then
echo "Failed to obtain admin token"
exit 1
fi
cd /var/www/html
kc_users_url="${KC_BASE}/admin/realms/${KC_REALM}/users?max=2000&briefRepresentation=false"
if [[ -n "${ONLY_USERNAME}" ]]; then
username_q=$(jq -nr --arg v "${ONLY_USERNAME}" '$v|@uri')
kc_users_url="${KC_BASE}/admin/realms/${KC_REALM}/users?username=${username_q}&exact=true&max=1&briefRepresentation=false"
fi
users=$(curl -fsS -H "Authorization: Bearer ${token}" "${kc_users_url}")
if ! jq -e 'type == "array"' >/dev/null 2>&1 <<<"${users}"; then
echo "ERROR: Keycloak user list is not an array; aborting sync" >&2
exit 1
fi
kc_set_user_mail_meta() {
local user_id="${1}"
local primary_email="${2}"
local mailu_account_count="${3}"
local synced_at="${4}"
# Fetch the full user representation so we don't accidentally clobber attributes.
local user_json updated_json
if ! user_json=$(curl -fsS -H "Authorization: Bearer ${token}" \
"${KC_BASE}/admin/realms/${KC_REALM}/users/${user_id}"); then
echo "WARN: unable to fetch Keycloak user ${user_id} for metadata writeback" >&2
return 1
fi
updated_json=$(
jq -c \
--arg primary_email "${primary_email}" \
--arg mailu_account_count "${mailu_account_count}" \
--arg synced_at "${synced_at}" \
'
.attributes = (.attributes // {}) |
.attributes.nextcloud_mail_primary_email = [$primary_email] |
.attributes.nextcloud_mail_account_count = [$mailu_account_count] |
.attributes.nextcloud_mail_synced_at = [$synced_at] |
del(.access)
' <<<"${user_json}"
)
curl -fsS -X PUT \
-H "Authorization: Bearer ${token}" \
-H "Content-Type: application/json" \
-d "${updated_json}" \
"${KC_BASE}/admin/realms/${KC_REALM}/users/${user_id}" >/dev/null
}
while read -r user; do
user_id=$(jq -r '.id' <<<"${user}")
username=$(jq -r '.username' <<<"${user}")
keycloak_email=$(echo "${user}" | jq -r '.email // empty')
mailu_email=$(echo "${user}" | jq -r '(.attributes.mailu_email[0] // .attributes.mailu_email // empty)')
app_pw=$(echo "${user}" | jq -r '(.attributes.mailu_app_password[0] // .attributes.mailu_app_password // empty)')
if [[ -z "${mailu_email}" ]]; then
if [[ -n "${keycloak_email}" && "${keycloak_email,,}" == *"@${MAILU_DOMAIN,,}" ]]; then
mailu_email="${keycloak_email}"
else
mailu_email="${username}@${MAILU_DOMAIN}"
fi
fi
[[ -z "${mailu_email}" || -z "${app_pw}" ]] && continue
if ! accounts=$(list_mail_accounts "${username}"); then
continue
fi
# Manage only internal Mailu-domain accounts; leave any external accounts untouched.
mailu_accounts=$(awk -v d="${MAILU_DOMAIN,,}" 'tolower($2) ~ ("@" d "$") {print}' <<<"${accounts}" || true)
desired_email="${mailu_email}"
primary_id=""
primary_email=""
if [[ -n "${mailu_accounts}" ]]; then
while IFS=$'\t' read -r account_id account_email; do
if [[ -z "${primary_id}" ]]; then
primary_id="${account_id}"
primary_email="${account_email}"
fi
if [[ "${account_email,,}" == "${desired_email,,}" ]]; then
primary_id="${account_id}"
primary_email="${account_email}"
break
fi
done <<<"${mailu_accounts}"
echo "Updating ${username} mail account ${primary_id} (${primary_email})"
/usr/sbin/runuser -u www-data -- php occ mail:account:update -q "${primary_id}" \
--name "${username}" \
--email "${desired_email}" \
--imap-host mail.bstein.dev \
--imap-port 993 \
--imap-ssl-mode ssl \
--imap-user "${desired_email}" \
--imap-password "${app_pw}" \
--smtp-host mail.bstein.dev \
--smtp-port 587 \
--smtp-ssl-mode tls \
--smtp-user "${desired_email}" \
--smtp-password "${app_pw}" \
--auth-method password >/dev/null 2>&1 || true
# Remove any extra Mailu-domain accounts for this user to prevent duplicates.
while IFS=$'\t' read -r account_id account_email; do
if [[ "${account_id}" == "${primary_id}" ]]; then
continue
fi
echo "Deleting extra mail account ${account_id} (${account_email})"
/usr/sbin/runuser -u www-data -- php occ mail:account:delete -q "${account_id}" >/dev/null 2>&1 || true
done <<<"${mailu_accounts}"
else
echo "Creating mail account for ${username} (${desired_email})"
/usr/sbin/runuser -u www-data -- php occ mail:account:create -q \
"${username}" "${username}" "${desired_email}" \
mail.bstein.dev 993 ssl "${desired_email}" "${app_pw}" \
mail.bstein.dev 587 tls "${desired_email}" "${app_pw}" password >/dev/null 2>&1 || true
fi
# Write non-secret metadata back to Keycloak for UI introspection and onboarding gating.
synced_at=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
if accounts_after=$(list_mail_accounts "${username}"); then
mailu_accounts_after=$(awk -v d="${MAILU_DOMAIN,,}" 'tolower($2) ~ ("@" d "$") {print}' <<<"${accounts_after}" || true)
if [[ -n "${mailu_accounts_after}" ]]; then
mailu_account_count=$(printf '%s\n' "${mailu_accounts_after}" | wc -l | tr -d ' ')
else
mailu_account_count="0"
fi
primary_email_after=""
editor_mode_ids=()
if [[ -n "${mailu_accounts_after}" ]]; then
while IFS=$'\t' read -r _account_id account_email; do
editor_mode_ids+=("${_account_id}")
if [[ "${account_email,,}" == "${desired_email,,}" ]]; then
primary_email_after="${account_email}"
break
fi
if [[ -z "${primary_email_after}" ]]; then
primary_email_after="${account_email}"
fi
done <<<"${mailu_accounts_after}"
fi
set_editor_mode_richtext "${editor_mode_ids[@]}"
else
mailu_account_count="0"
primary_email_after=""
fi
kc_set_user_mail_meta "${user_id}" "${primary_email_after}" "${mailu_account_count}" "${synced_at}" || true
done < <(jq -c '.[]' <<<"${users}")

View File

@ -1,48 +0,0 @@
# services/nextcloud/cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: nextcloud-cron
namespace: nextcloud
spec:
schedule: "*/5 * * * *"
suspend: true
concurrencyPolicy: Forbid
jobTemplate:
spec:
template:
spec:
securityContext:
runAsUser: 33
runAsGroup: 33
fsGroup: 33
restartPolicy: OnFailure
containers:
- name: nextcloud-cron
image: nextcloud:29-apache
imagePullPolicy: IfNotPresent
command: ["/bin/sh", "-c"]
args:
- "cd /var/www/html && php -f cron.php"
volumeMounts:
- name: nextcloud-web
mountPath: /var/www/html
- name: nextcloud-config-pvc
mountPath: /var/www/html/config
- name: nextcloud-custom-apps
mountPath: /var/www/html/custom_apps
- name: nextcloud-user-data
mountPath: /var/www/html/data
volumes:
- name: nextcloud-config-pvc
persistentVolumeClaim:
claimName: nextcloud-config-v2
- name: nextcloud-custom-apps
persistentVolumeClaim:
claimName: nextcloud-custom-apps-v2
- name: nextcloud-user-data
persistentVolumeClaim:
claimName: nextcloud-user-data-v2
- name: nextcloud-web
persistentVolumeClaim:
claimName: nextcloud-web-v2

View File

@ -9,13 +9,5 @@ resources:
- pvc.yaml
- deployment.yaml
- collabora.yaml
- cronjob.yaml
- maintenance-cronjob.yaml
- service.yaml
- ingress.yaml
configMapGenerator:
- name: nextcloud-maintenance-script
files:
- maintenance.sh=scripts/nextcloud-maintenance.sh
options:
disableNameSuffixHash: true

View File

@ -1,98 +0,0 @@
# services/nextcloud/maintenance-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: nextcloud-maintenance
namespace: nextcloud
spec:
schedule: "30 4 * * *"
suspend: true
concurrencyPolicy: Forbid
jobTemplate:
spec:
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "nextcloud"
vault.hashicorp.com/agent-inject-secret-nextcloud-env.sh: "kv/data/atlas/nextcloud/nextcloud-db"
vault.hashicorp.com/agent-inject-template-nextcloud-env.sh: |
{{ with secret "kv/data/atlas/nextcloud/nextcloud-db" }}
export POSTGRES_DB="{{ .Data.data.database }}"
export POSTGRES_USER="{{ index .Data.data "db-username" }}"
export POSTGRES_PASSWORD="{{ index .Data.data "db-password" }}"
{{ end }}
{{ with secret "kv/data/atlas/nextcloud/nextcloud-admin" }}
export NEXTCLOUD_ADMIN_USER="{{ index .Data.data "admin-user" }}"
export NEXTCLOUD_ADMIN_PASSWORD="{{ index .Data.data "admin-password" }}"
{{ end }}
export ADMIN_USER="${NEXTCLOUD_ADMIN_USER}"
export ADMIN_PASS="${NEXTCLOUD_ADMIN_PASSWORD}"
{{ with secret "kv/data/atlas/nextcloud/nextcloud-oidc" }}
export OIDC_CLIENT_ID="{{ index .Data.data "client-id" }}"
export OIDC_CLIENT_SECRET="{{ index .Data.data "client-secret" }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/postmark-relay" }}
export SMTP_NAME="{{ index .Data.data "apikey" }}"
export SMTP_PASSWORD="{{ index .Data.data "apikey" }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/keycloak-admin" }}
export KC_ADMIN_USER="{{ .Data.data.username }}"
export KC_ADMIN_PASS="{{ .Data.data.password }}"
{{ end }}
spec:
restartPolicy: OnFailure
securityContext:
runAsUser: 0
runAsGroup: 0
serviceAccountName: nextcloud-vault
containers:
- name: maintenance
image: nextcloud:29-apache
imagePullPolicy: IfNotPresent
command: ["/bin/sh", "-c"]
args:
- |
set -eu
. /vault/secrets/nextcloud-env.sh
exec /maintenance/maintenance.sh
env:
- name: NC_URL
value: https://cloud.bstein.dev
volumeMounts:
- name: nextcloud-web
mountPath: /var/www/html
- name: nextcloud-config-pvc
mountPath: /var/www/html/config
- name: nextcloud-custom-apps
mountPath: /var/www/html/custom_apps
- name: nextcloud-user-data
mountPath: /var/www/html/data
- name: maintenance-script
mountPath: /maintenance/maintenance.sh
subPath: maintenance.sh
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi
volumes:
- name: nextcloud-config-pvc
persistentVolumeClaim:
claimName: nextcloud-config-v2
- name: nextcloud-custom-apps
persistentVolumeClaim:
claimName: nextcloud-custom-apps-v2
- name: nextcloud-user-data
persistentVolumeClaim:
claimName: nextcloud-user-data-v2
- name: nextcloud-web
persistentVolumeClaim:
claimName: nextcloud-web-v2
- name: maintenance-script
configMap:
name: nextcloud-maintenance-script
defaultMode: 0755

View File

@ -1,108 +0,0 @@
#!/bin/bash
set -euo pipefail
NC_URL="${NC_URL:-https://cloud.bstein.dev}"
ADMIN_USER="${ADMIN_USER:?}"
ADMIN_PASS="${ADMIN_PASS:?}"
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq
apt-get install -y -qq curl jq >/dev/null
run_occ() {
runuser -u www-data -- php /var/www/html/occ "$@"
}
log() { echo "[$(date -Is)] $*"; }
log "Ensuring Nextcloud app files are present"
if [[ ! -d /var/www/html/lib && -d /usr/src/nextcloud/lib ]]; then
rsync -a --delete \
--exclude config \
--exclude data \
/usr/src/nextcloud/ /var/www/html/
fi
log "Ensuring Nextcloud permissions"
mkdir -p /var/www/html/data
chown 33:33 /var/www/html || true
chmod 775 /var/www/html || true
chown -R 33:33 /var/www/html/apps /var/www/html/custom_apps /var/www/html/data /var/www/html/config 2>/dev/null || true
log "Applying Atlas theming"
run_occ config:app:set theming name --value "Atlas Cloud"
run_occ config:app:set theming slogan --value "Unified access to Atlas services"
run_occ config:app:set theming url --value "https://cloud.bstein.dev"
run_occ config:app:set theming color --value "#0f172a"
run_occ config:app:set theming disable-user-theming --value "yes"
log "Applying Atlas Mail styling defaults"
run_occ app:install customcss >/dev/null 2>&1 || true
run_occ app:enable customcss >/dev/null 2>&1 || true
MAIL_CSS=$(cat <<'CSS'
.mail-message-body, .mail-message-body pre, .mail-message-body code, .mail-message-body table {
font-family: "Inter", "Source Sans 3", "Helvetica Neue", Arial, sans-serif;
font-size: 14px;
line-height: 1.6;
color: var(--color-main-text);
}
.mail-message-body pre {
background: rgba(15, 23, 42, 0.06);
padding: 12px;
border-radius: 8px;
}
.mail-message-body blockquote {
border-left: 3px solid var(--color-border);
padding-left: 12px;
margin: 8px 0;
color: var(--color-text-lighter);
}
.mail-message-body img {
max-width: 100%;
border-radius: 6px;
}
CSS
)
run_occ config:app:set customcss css --value "${MAIL_CSS}" >/dev/null
log "Setting default quota to 250 GB"
run_occ config:app:set files default_quota --value "250 GB"
API_BASE="${NC_URL}/ocs/v2.php/apps/external/api/v1"
AUTH=(-u "${ADMIN_USER}:${ADMIN_PASS}" -H "OCS-APIRequest: true")
log "Removing existing external links"
existing=$(curl -sf "${AUTH[@]}" "${API_BASE}?format=json" | jq -r '.ocs.data[].id // empty')
for id in ${existing}; do
curl -sf "${AUTH[@]}" -X DELETE "${API_BASE}/sites/${id}?format=json" >/dev/null || true
done
SITES=(
"Vaultwarden|https://vault.bstein.dev"
"Jellyfin|https://stream.bstein.dev"
"Gitea|https://scm.bstein.dev"
"Jenkins|https://ci.bstein.dev"
"Harbor|https://registry.bstein.dev"
"Vault|https://secret.bstein.dev"
"Jitsi|https://meet.bstein.dev"
"Grafana|https://metrics.bstein.dev"
"Chat LLM|https://chat.ai.bstein.dev"
"Vision|https://draw.ai.bstein.dev"
"STT/TTS|https://talk.ai.bstein.dev"
)
log "Seeding external links"
for entry in "${SITES[@]}"; do
IFS="|" read -r name url <<<"${entry}"
curl -sf "${AUTH[@]}" -X POST "${API_BASE}/sites?format=json" \
-d "name=${name}" \
-d "url=${url}" \
-d "lang=" \
-d "type=link" \
-d "device=" \
-d "icon=" \
-d "groups[]=" \
-d "redirect=1" >/dev/null
done
log "Maintenance run completed"

View File

@ -1,55 +0,0 @@
# services/vault/k8s-auth-config-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: vault-k8s-auth-config
namespace: vault
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/15 * * * *"
suspend: false
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 1
template:
spec:
serviceAccountName: vault-admin
restartPolicy: Never
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: configure-k8s-auth
image: hashicorp/vault:1.17.6
imagePullPolicy: IfNotPresent
command:
- sh
- /scripts/vault_k8s_auth_configure.sh
env:
- name: VAULT_ADDR
value: http://10.43.57.249:8200
- name: VAULT_K8S_ROLE
value: vault-admin
- name: VAULT_K8S_TOKEN_REVIEWER_JWT_FILE
value: /var/run/secrets/vault-token-reviewer/token
- name: VAULT_K8S_ROLE_TTL
value: 1h
volumeMounts:
- name: k8s-auth-config-script
mountPath: /scripts
readOnly: true
- name: token-reviewer
mountPath: /var/run/secrets/vault-token-reviewer
readOnly: true
volumes:
- name: k8s-auth-config-script
configMap:
name: vault-k8s-auth-config-script
defaultMode: 0555
- name: token-reviewer
secret:
secretName: vault-admin-token-reviewer

View File

@ -10,21 +10,9 @@ resources:
- rbac.yaml
- configmap.yaml
- statefulset.yaml
- k8s-auth-config-cronjob.yaml
- oidc-config-cronjob.yaml
- service.yaml
- ingress.yaml
- certificate.yaml
- serverstransport.yaml
generatorOptions:
disableNameSuffixHash: true
configMapGenerator:
- name: vault-oidc-config-script
files:
- vault_oidc_configure.sh=scripts/vault_oidc_configure.sh
- name: vault-k8s-auth-config-script
files:
- vault_k8s_auth_configure.sh=scripts/vault_k8s_auth_configure.sh
- name: vault-entrypoint
files:
- vault-entrypoint.sh=scripts/vault-entrypoint.sh

View File

@ -1,83 +0,0 @@
# services/vault/oidc-config-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: vault-oidc-config
namespace: vault
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/15 * * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 1
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "vault-admin"
vault.hashicorp.com/agent-inject-secret-vault-oidc-env.sh: "kv/data/atlas/vault/vault-oidc-config"
vault.hashicorp.com/agent-inject-template-vault-oidc-env.sh: |
{{ with secret "kv/data/atlas/vault/vault-oidc-config" }}
export VAULT_OIDC_DISCOVERY_URL="{{ .Data.data.discovery_url }}"
export VAULT_OIDC_CLIENT_ID="{{ .Data.data.client_id }}"
export VAULT_OIDC_CLIENT_SECRET="{{ .Data.data.client_secret }}"
export VAULT_OIDC_DEFAULT_ROLE="{{ .Data.data.default_role }}"
export VAULT_OIDC_SCOPES="{{ .Data.data.scopes }}"
export VAULT_OIDC_USER_CLAIM="{{ .Data.data.user_claim }}"
export VAULT_OIDC_GROUPS_CLAIM="{{ .Data.data.groups_claim }}"
export VAULT_OIDC_TOKEN_POLICIES="{{ .Data.data.token_policies }}"
export VAULT_OIDC_ADMIN_GROUP="{{ .Data.data.admin_group }}"
export VAULT_OIDC_ADMIN_POLICIES="{{ .Data.data.admin_policies }}"
export VAULT_OIDC_DEV_GROUP="{{ .Data.data.dev_group }}"
export VAULT_OIDC_DEV_POLICIES="{{ .Data.data.dev_policies }}"
export VAULT_OIDC_USER_GROUP="{{ .Data.data.user_group }}"
export VAULT_OIDC_USER_POLICIES="{{ .Data.data.user_policies }}"
export VAULT_OIDC_REDIRECT_URIS="{{ .Data.data.redirect_uris }}"
export VAULT_OIDC_BOUND_AUDIENCES="{{ .Data.data.bound_audiences }}"
export VAULT_OIDC_BOUND_CLAIMS="{{ .Data.data.bound_claims }}"
export VAULT_OIDC_BOUND_CLAIMS_TYPE="{{ .Data.data.bound_claims_type }}"
{{ end }}
spec:
serviceAccountName: vault-admin
restartPolicy: Never
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: configure-oidc
image: hashicorp/vault:1.17.6
imagePullPolicy: IfNotPresent
command:
- /entrypoint.sh
args:
- sh
- /scripts/vault_oidc_configure.sh
env:
- name: VAULT_ADDR
value: http://10.43.57.249:8200
- name: VAULT_K8S_ROLE
value: vault-admin
- name: VAULT_ENV_FILE
value: /vault/secrets/vault-oidc-env.sh
volumeMounts:
- name: vault-entrypoint
mountPath: /entrypoint.sh
subPath: vault-entrypoint.sh
- name: oidc-config-script
mountPath: /scripts
readOnly: true
volumes:
- name: vault-entrypoint
configMap:
name: vault-entrypoint
defaultMode: 493
- name: oidc-config-script
configMap:
name: vault-oidc-config-script
defaultMode: 0555

View File

@ -1,34 +0,0 @@
#!/bin/sh
set -eu
if [ -n "${VAULT_ENV_FILE:-}" ]; then
if [ -f "${VAULT_ENV_FILE}" ]; then
# shellcheck disable=SC1090
. "${VAULT_ENV_FILE}"
else
echo "Vault env file not found: ${VAULT_ENV_FILE}" >&2
exit 1
fi
fi
if [ -n "${VAULT_COPY_FILES:-}" ]; then
old_ifs="$IFS"
IFS=','
for pair in ${VAULT_COPY_FILES}; do
src="${pair%%:*}"
dest="${pair#*:}"
if [ -z "${src}" ] || [ -z "${dest}" ]; then
echo "Vault copy entry malformed: ${pair}" >&2
exit 1
fi
if [ ! -f "${src}" ]; then
echo "Vault file not found: ${src}" >&2
exit 1
fi
mkdir -p "$(dirname "${dest}")"
cp "${src}" "${dest}"
done
IFS="$old_ifs"
fi
exec "$@"

View File

@ -1,259 +0,0 @@
#!/usr/bin/env sh
set -eu
log() { echo "[vault-k8s-auth] $*"; }
vault_cmd() {
for attempt in 1 2 3 4 5 6; do
set +e
output="$(vault "$@" 2>&1)"
status=$?
set -e
if [ "${status}" -eq 0 ]; then
printf '%s' "${output}"
return 0
fi
log "vault command failed; retrying (${attempt}/6)"
sleep $((attempt * 2))
done
log "vault command failed; giving up"
return 1
}
ensure_token() {
if [ -n "${VAULT_TOKEN:-}" ]; then
return
fi
role="${VAULT_K8S_ROLE:-vault}"
jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
if ! VAULT_TOKEN="$(vault_cmd write -field=token auth/kubernetes/login role="${role}" jwt="${jwt}")"; then
log "kubernetes auth login failed; set VAULT_TOKEN or fix role ${role}"
exit 1
fi
export VAULT_TOKEN
}
if ! status_json="$(vault_cmd status -format=json)"; then
log "vault status failed; check VAULT_ADDR and VAULT_TOKEN"
exit 1
fi
if ! printf '%s' "${status_json}" | grep -q '"initialized":[[:space:]]*true'; then
log "vault not initialized; skipping"
exit 0
fi
if printf '%s' "${status_json}" | grep -q '"sealed":[[:space:]]*true'; then
log "vault sealed; skipping"
exit 0
fi
ensure_token
k8s_host="https://${KUBERNETES_SERVICE_HOST}:443"
k8s_ca="$(cat /var/run/secrets/kubernetes.io/serviceaccount/ca.crt)"
k8s_token="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
role_ttl="${VAULT_K8S_ROLE_TTL:-1h}"
token_reviewer_jwt="${VAULT_K8S_TOKEN_REVIEWER_JWT:-}"
if [ -z "${token_reviewer_jwt}" ] && [ -n "${VAULT_K8S_TOKEN_REVIEWER_JWT_FILE:-}" ] && [ -r "${VAULT_K8S_TOKEN_REVIEWER_JWT_FILE}" ]; then
token_reviewer_jwt="$(cat "${VAULT_K8S_TOKEN_REVIEWER_JWT_FILE}")"
fi
if [ -z "${token_reviewer_jwt}" ]; then
token_reviewer_jwt="${k8s_token}"
fi
if ! vault_cmd auth list -format=json | grep -q '"kubernetes/"'; then
log "enabling kubernetes auth"
vault_cmd auth enable kubernetes
fi
log "configuring kubernetes auth"
vault_cmd write auth/kubernetes/config \
token_reviewer_jwt="${token_reviewer_jwt}" \
kubernetes_host="${k8s_host}" \
kubernetes_ca_cert="${k8s_ca}"
write_raw_policy() {
name="$1"
body="$2"
log "writing policy ${name}"
printf '%s\n' "${body}" | vault_cmd policy write "${name}" -
}
write_policy_and_role() {
role="$1"
namespace="$2"
service_accounts="$3"
read_paths="$4"
write_paths="$5"
policy_body=""
for path in ${read_paths}; do
policy_body="${policy_body}
path \"kv/data/atlas/${path}\" {
capabilities = [\"read\"]
}
path \"kv/metadata/atlas/${path}\" {
capabilities = [\"list\"]
}
"
done
for path in ${write_paths}; do
policy_body="${policy_body}
path \"kv/data/atlas/${path}\" {
capabilities = [\"create\", \"update\", \"read\"]
}
path \"kv/metadata/atlas/${path}\" {
capabilities = [\"list\"]
}
"
done
log "writing policy ${role}"
printf '%s\n' "${policy_body}" | vault_cmd policy write "${role}" -
log "writing role ${role}"
vault_cmd write "auth/kubernetes/role/${role}" \
bound_service_account_names="${service_accounts}" \
bound_service_account_namespaces="${namespace}" \
policies="${role}" \
ttl="${role_ttl}"
}
vault_admin_policy='
path "sys/auth" {
capabilities = ["read"]
}
path "sys/auth/*" {
capabilities = ["create", "update", "delete", "sudo", "read"]
}
path "auth/kubernetes/*" {
capabilities = ["create", "update", "read"]
}
path "auth/oidc/*" {
capabilities = ["create", "update", "read"]
}
path "sys/policies/acl" {
capabilities = ["list"]
}
path "sys/policies/acl/*" {
capabilities = ["create", "update", "read"]
}
path "sys/internal/ui/mounts" {
capabilities = ["read"]
}
path "sys/mounts" {
capabilities = ["read"]
}
path "sys/mounts/auth/*" {
capabilities = ["read", "update", "sudo"]
}
path "kv/data/atlas/vault/*" {
capabilities = ["read"]
}
path "kv/metadata/atlas/vault/*" {
capabilities = ["list"]
}
path "kv/data/*" {
capabilities = ["create", "update", "read", "delete", "patch"]
}
path "kv/metadata" {
capabilities = ["list"]
}
path "kv/metadata/*" {
capabilities = ["read", "list", "delete"]
}
path "kv/data/atlas/shared/*" {
capabilities = ["create", "update", "read", "patch"]
}
path "kv/metadata/atlas/shared/*" {
capabilities = ["list"]
}
'
write_raw_policy "vault-admin" "${vault_admin_policy}"
dev_kv_policy='
path "kv/metadata" {
capabilities = ["list"]
}
path "kv/metadata/atlas" {
capabilities = ["list"]
}
path "kv/metadata/atlas/shared" {
capabilities = ["list"]
}
path "kv/metadata/atlas/shared/*" {
capabilities = ["list"]
}
path "kv/data/atlas/shared/*" {
capabilities = ["read"]
}
'
write_raw_policy "dev-kv" "${dev_kv_policy}"
log "writing role vault-admin"
vault_cmd write "auth/kubernetes/role/vault-admin" \
bound_service_account_names="vault-admin,ariadne" \
bound_service_account_namespaces="vault,maintenance" \
policies="vault-admin" \
ttl="${role_ttl}"
write_policy_and_role "outline" "outline" "outline-vault" \
"outline/* shared/postmark-relay" ""
write_policy_and_role "planka" "planka" "planka-vault" \
"planka/* shared/postmark-relay" ""
write_policy_and_role "bstein-dev-home" "bstein-dev-home" "bstein-dev-home,bstein-dev-home-vault-sync" \
"portal/* shared/chat-ai-keys-runtime shared/portal-e2e-client shared/postmark-relay mailu/mailu-initial-account-secret shared/harbor-pull" ""
write_policy_and_role "gitea" "gitea" "gitea-vault" \
"gitea/*" ""
write_policy_and_role "vaultwarden" "vaultwarden" "vaultwarden-vault" \
"vaultwarden/* mailu/mailu-initial-account-secret" ""
write_policy_and_role "sso" "sso" "sso-vault,sso-vault-sync,mas-secrets-ensure" \
"sso/* portal/bstein-dev-home-keycloak-admin shared/keycloak-admin shared/portal-e2e-client shared/postmark-relay shared/harbor-pull" ""
write_policy_and_role "mailu-mailserver" "mailu-mailserver" "mailu-vault-sync" \
"mailu/* shared/postmark-relay shared/harbor-pull" ""
write_policy_and_role "harbor" "harbor" "harbor-vault-sync" \
"harbor/* shared/harbor-pull" ""
write_policy_and_role "nextcloud" "nextcloud" "nextcloud-vault" \
"nextcloud/* shared/keycloak-admin shared/postmark-relay" ""
write_policy_and_role "comms" "comms" "comms-vault,atlasbot" \
"comms/* shared/chat-ai-keys-runtime shared/harbor-pull" ""
write_policy_and_role "jenkins" "jenkins" "jenkins,jenkins-vault-sync" \
"jenkins/* shared/harbor-pull" ""
write_policy_and_role "monitoring" "monitoring" "monitoring-vault-sync" \
"monitoring/* shared/postmark-relay shared/harbor-pull" ""
write_policy_and_role "logging" "logging" "logging-vault-sync" \
"logging/* shared/harbor-pull" ""
write_policy_and_role "pegasus" "jellyfin" "pegasus-vault-sync" \
"pegasus/* shared/harbor-pull" ""
write_policy_and_role "crypto" "crypto" "crypto-vault-sync" \
"crypto/* shared/harbor-pull" ""
write_policy_and_role "health" "health" "health-vault-sync" \
"health/*" ""
write_policy_and_role "maintenance" "maintenance" "ariadne,maintenance-vault-sync,metis" \
"maintenance/ariadne-db maintenance/metis-oidc maintenance/metis-ssh-keys maintenance/metis-runtime portal/atlas-portal-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret mailu/mailu-initial-account-secret nextcloud/nextcloud-db nextcloud/nextcloud-admin health/wger-admin finance/firefly-secrets comms/mas-admin-client-runtime comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin vault/vault-oidc-config shared/harbor-pull harbor/harbor-core" ""
write_policy_and_role "maintenance-metis-token-sync" "maintenance" "metis-token-sync" \
"" \
"maintenance/metis-runtime"
write_policy_and_role "finance" "finance" "finance-vault" \
"finance/* shared/postmark-relay" ""
write_policy_and_role "finance-secrets" "finance" "finance-secrets-ensure" \
"" \
"finance/*"
write_policy_and_role "longhorn" "longhorn-system" "longhorn-vault,longhorn-vault-sync" \
"longhorn/* shared/harbor-pull" ""
write_policy_and_role "postgres" "postgres" "postgres-vault" \
"postgres/postgres-db" ""
write_policy_and_role "vault" "vault" "vault" \
"vault/*" ""
write_policy_and_role "sso-secrets" "sso" "mas-secrets-ensure" \
"shared/keycloak-admin maintenance/metis-ssh-keys" \
"harbor/harbor-oidc vault/vault-oidc-config comms/synapse-oidc logging/oauth2-proxy-logs-oidc finance/actual-oidc maintenance/metis-oidc maintenance/metis-ssh-keys"
write_policy_and_role "crypto-secrets" "crypto" "crypto-secrets-ensure" \
"" \
"crypto/wallet-monero-temp-rpc-auth"
write_policy_and_role "comms-secrets" "comms" \
"comms-secrets-ensure,mas-db-ensure,mas-admin-client-secret-writer,othrys-synapse-signingkey-job" \
"" \
"comms/turn-shared-secret comms/livekit-api comms/synapse-redis comms/synapse-macaroon comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin comms/synapse-registration comms/mas-db comms/mas-admin-client-runtime comms/mas-secrets-runtime comms/othrys-synapse-signingkey"

View File

@ -1,166 +0,0 @@
#!/usr/bin/env sh
set -eu
log() { echo "[vault-oidc] $*"; }
vault_cmd() {
for attempt in 1 2 3 4 5 6; do
set +e
output="$(vault "$@" 2>&1)"
status=$?
set -e
if [ "${status}" -eq 0 ]; then
printf '%s' "${output}"
return 0
fi
log "vault command failed; retrying (${attempt}/6)"
sleep $((attempt * 2))
done
log "vault command failed; giving up"
return 1
}
ensure_token() {
if [ -n "${VAULT_TOKEN:-}" ]; then
return
fi
role="${VAULT_K8S_ROLE:-vault}"
jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
if ! VAULT_TOKEN="$(vault_cmd write -field=token auth/kubernetes/login role="${role}" jwt="${jwt}")"; then
log "kubernetes auth login failed; set VAULT_TOKEN or fix role ${role}"
exit 1
fi
export VAULT_TOKEN
}
if ! status_json="$(vault_cmd status -format=json)"; then
log "vault status failed; check VAULT_ADDR and VAULT_TOKEN"
exit 1
fi
if ! printf '%s' "${status_json}" | grep -q '"initialized":[[:space:]]*true'; then
log "vault not initialized; skipping"
exit 0
fi
if printf '%s' "${status_json}" | grep -q '"sealed":[[:space:]]*true'; then
log "vault sealed; skipping"
exit 0
fi
ensure_token
: "${VAULT_OIDC_DISCOVERY_URL:?set VAULT_OIDC_DISCOVERY_URL}"
: "${VAULT_OIDC_CLIENT_ID:?set VAULT_OIDC_CLIENT_ID}"
: "${VAULT_OIDC_CLIENT_SECRET:?set VAULT_OIDC_CLIENT_SECRET}"
default_role="${VAULT_OIDC_DEFAULT_ROLE:-admin}"
scopes="${VAULT_OIDC_SCOPES:-openid profile email groups}"
user_claim="${VAULT_OIDC_USER_CLAIM:-preferred_username}"
groups_claim="${VAULT_OIDC_GROUPS_CLAIM:-groups}"
redirect_uris="${VAULT_OIDC_REDIRECT_URIS:-https://secret.bstein.dev/ui/vault/auth/oidc/oidc/callback}"
bound_audiences="${VAULT_OIDC_BOUND_AUDIENCES:-${VAULT_OIDC_CLIENT_ID}}"
bound_claims_type="${VAULT_OIDC_BOUND_CLAIMS_TYPE:-string}"
bound_claims_type="$(printf '%s' "${bound_claims_type}" | tr -d '[:space:]')"
if [ -z "${bound_claims_type}" ] || [ "${bound_claims_type}" = "<novalue>" ]; then
bound_claims_type="string"
fi
admin_group="${VAULT_OIDC_ADMIN_GROUP:-admin}"
admin_policies="${VAULT_OIDC_ADMIN_POLICIES:-default,vault-admin}"
dev_group="${VAULT_OIDC_DEV_GROUP:-dev}"
dev_policies="${VAULT_OIDC_DEV_POLICIES:-default,dev-kv}"
user_group="${VAULT_OIDC_USER_GROUP:-${dev_group}}"
user_policies="${VAULT_OIDC_USER_POLICIES:-${VAULT_OIDC_TOKEN_POLICIES:-${dev_policies}}}"
if ! vault_cmd auth list -format=json | grep -q '"oidc/"'; then
log "enabling oidc auth method"
vault_cmd auth enable oidc
fi
log "configuring oidc auth"
vault_cmd write auth/oidc/config \
oidc_discovery_url="${VAULT_OIDC_DISCOVERY_URL}" \
oidc_client_id="${VAULT_OIDC_CLIENT_ID}" \
oidc_client_secret="${VAULT_OIDC_CLIENT_SECRET}" \
default_role="${default_role}"
vault_cmd auth tune -listing-visibility=unauth oidc >/dev/null
build_bound_claims() {
claim="$1"
groups="$2"
json="{\"${claim}\":["
first=1
old_ifs=$IFS
IFS=,
for item in $groups; do
item="$(printf '%s' "$item" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
if [ -z "${item}" ]; then
continue
fi
if [ "${first}" -eq 0 ]; then
json="${json},"
fi
json="${json}\"${item}\""
first=0
done
IFS=$old_ifs
json="${json}]}"
printf '%s' "${json}"
}
build_json_array() {
items="$1"
json="["
first=1
old_ifs=$IFS
IFS=,
for item in $items; do
item="$(printf '%s' "$item" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
if [ -z "${item}" ]; then
continue
fi
if [ "${first}" -eq 0 ]; then
json="${json},"
fi
json="${json}\"${item}\""
first=0
done
IFS=$old_ifs
json="${json}]"
printf '%s' "${json}"
}
configure_role() {
role_name="$1"
role_groups="$2"
role_policies="$3"
if [ -z "${role_name}" ] || [ -z "${role_groups}" ] || [ -z "${role_policies}" ]; then
log "skipping role ${role_name} (missing groups or policies)"
return
fi
claims="$(build_bound_claims "${groups_claim}" "${role_groups}")"
scopes_csv="$(printf '%s' "${scopes}" | tr ' ' ',' | tr -s ',' | sed 's/^,//;s/,$//')"
redirect_json="$(build_json_array "${redirect_uris}")"
payload_file="$(mktemp)"
cat > "${payload_file}" <<EOF
{
"user_claim": "${user_claim}",
"oidc_scopes": "${scopes_csv}",
"token_policies": "${role_policies}",
"bound_audiences": "${bound_audiences}",
"bound_claims": ${claims},
"bound_claims_type": "${bound_claims_type}",
"groups_claim": "${groups_claim}",
"allowed_redirect_uris": ${redirect_json}
}
EOF
log "configuring oidc role ${role_name}"
vault_cmd write "auth/oidc/role/${role_name}" @"${payload_file}"
rm -f "${payload_file}"
}
configure_role "admin" "${admin_group}" "${admin_policies}"
configure_role "dev" "${dev_group}" "${dev_policies}"
configure_role "user" "${user_group}" "${user_policies}"