maintenance: harden metis recovery and fix harbor rollout

maintenance/jenkins: align Metis ingress, sentinel push, and CI job
maintenance: add Metis service and sentinel manifests
2026-03-31 14:51:49 -03:00 · 2026-03-31 14:21:53 -03:00 · 2026-03-31 14:07:17 -03:00 · 2026-03-31 13:54:04 -03:00 · 2026-03-30 18:41:21 -03:00 · 2026-03-30 18:40:59 -03:00
263 changed files with 10812 additions and 33771 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,6 +2,7 @@
 !README.md
 !knowledge/**/*.md
 !services/comms/knowledge/**/*.md
+!services/atlasbot/knowledge/**/*.md
 __pycache__/
 *.py[cod]
 .pytest_cache
--- a/374
+++ b/374
@ -11,47 +11,9 @@ spec:
    hardware: rpi5
    kubernetes.io/arch: arm64
    node-role.kubernetes.io/worker: "true"
-  affinity:
-    nodeAffinity:
-      requiredDuringSchedulingIgnoredDuringExecution:
-        nodeSelectorTerms:
-          - matchExpressions:
-              - key: kubernetes.io/hostname
-                operator: NotIn
-                values:
-                  - titan-06
-      preferredDuringSchedulingIgnoredDuringExecution:
-        - weight: 100
-          preference:
-            matchExpressions:
-              - key: kubernetes.io/hostname
-                operator: NotIn
-                values:
-                  - titan-13
-                  - titan-15
-                  - titan-17
-                  - titan-19
-  topologySpreadConstraints:
-    - maxSkew: 1
-      topologyKey: kubernetes.io/hostname
-      whenUnsatisfiable: ScheduleAnyway
-      labelSelector:
-        matchLabels:
-          jenkins/jenkins-jenkins-agent: "true"
  containers:
-    - name: jnlp
-      image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
-      resources:
-        requests:
-          cpu: "25m"
-          memory: "256Mi"
    - name: python
-      image: registry.bstein.dev/bstein/python:3.12-slim
-      command:
-        - cat
-      tty: true
-    - name: quality-tools
-      image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
+      image: python:3.12-slim
      command:
        - cat
      tty: true
@ -61,21 +23,6 @@ spec:
  environment {
    PIP_DISABLE_PIP_VERSION_CHECK = '1'
    PYTHONUNBUFFERED = '1'
-    SUITE_NAME = 'titan_iac'
-    PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
-    SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
-    SONARQUBE_PROJECT_KEY = 'titan_iac'
-    SONARQUBE_TOKEN = credentials('sonarqube-token')
-    VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
-    QUALITY_GATE_SONARQUBE_ENFORCE = '1'
-    QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
-    QUALITY_GATE_IRONBANK_ENFORCE = '1'
-    QUALITY_GATE_IRONBANK_REQUIRED = '0'
-    QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
-  }
-  options {
-    disableConcurrentBuilds()
-    buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
  }
  stages {
    stage('Checkout') {
@ -85,295 +32,12 @@ spec:
    }
    stage('Install deps') {
      steps {
-        sh '''
-          set -eu
-          if ! command -v git >/dev/null 2>&1; then
-            apt-get update
-            apt-get install -y --no-install-recommends git ca-certificates
-            rm -rf /var/lib/apt/lists/*
-          fi
-          pip install --no-cache-dir -r ci/requirements.txt
-        '''
+        sh 'pip install --no-cache-dir -r ci/requirements.txt'
      }
    }
-    stage('Prepare local quality evidence') {
+    stage('Glue tests') {
      steps {
-        sh '''
-          set -eu
-          mkdir -p build
-          set +e
-          python3 -m testing.quality_gate --profile local --build-dir build
-          local_quality_rc=$?
-          set -e
-          printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
-        '''
-      }
-    }
-    stage('Collect SonarQube evidence') {
-      steps {
-        container('quality-tools') {
-          sh '''#!/usr/bin/env bash
-            set -euo pipefail
-            mkdir -p build
-            args=(
-              "-Dsonar.host.url=${SONARQUBE_HOST_URL}"
-              "-Dsonar.login=${SONARQUBE_TOKEN}"
-              "-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
-              "-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
-              "-Dsonar.sources=."
-              "-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml"
-              "-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
-            )
-            [ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
-            set +e
-            sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
-            rc=${PIPESTATUS[0]}
-            set -e
-            printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
-          '''
-        }
-        sh '''
-          set -eu
-          mkdir -p build
-          python3 - <<'PY'
-import base64
-import json
-import os
-import time
-import urllib.parse
-import urllib.request
-from pathlib import Path
-
-host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
-project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
-token = os.getenv('SONARQUBE_TOKEN', '').strip()
-report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
-
-payload = {
-    "status": "ERROR",
-    "note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
-}
-if host and project_key:
-    task_file = Path('.scannerwork/report-task.txt')
-    task_id = ''
-    if task_file.exists():
-        for line in task_file.read_text(encoding='utf-8').splitlines():
-            key, _, value = line.partition('=')
-            if key == 'ceTaskId':
-                task_id = value.strip()
-                break
-    if task_id:
-        ce_query = urllib.parse.urlencode({"id": task_id})
-        deadline = time.monotonic() + 180
-        while time.monotonic() < deadline:
-            ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
-            if token:
-                encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
-                ce_request.add_header("Authorization", f"Basic {encoded}")
-            try:
-                with urllib.request.urlopen(ce_request, timeout=12) as response:
-                    ce_payload = json.loads(response.read().decode("utf-8"))
-            except Exception:
-                time.sleep(3)
-                continue
-            status = str(ce_payload.get("task", {}).get("status", "")).upper()
-            if status in {"SUCCESS", "FAILED", "CANCELED"}:
-                break
-            time.sleep(3)
-
-    query = urllib.parse.urlencode({"projectKey": project_key})
-    request = urllib.request.Request(
-        f"{host}/api/qualitygates/project_status?{query}",
-        method="GET",
-    )
-    if token:
-        encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
-        request.add_header("Authorization", f"Basic {encoded}")
-    try:
-        with urllib.request.urlopen(request, timeout=12) as response:
-            payload = json.loads(response.read().decode("utf-8"))
-    except Exception as exc:  # noqa: BLE001
-        payload = {"status": "ERROR", "error": str(exc)}
-
-with open(report_path, "w", encoding="utf-8") as handle:
-    json.dump(payload, handle, indent=2, sort_keys=True)
-    handle.write("\\n")
-PY
-        '''
-      }
-    }
-    stage('Collect IronBank evidence') {
-      steps {
-        container('quality-tools') {
-          sh '''#!/usr/bin/env bash
-            set -euo pipefail
-            mkdir -p build
-            set +e
-            trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
-            trivy_rc=$?
-            set -e
-            if [ ! -s build/trivy-fs.json ]; then
-              cat > build/ironbank-compliance.json <<EOF
-{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
-EOF
-              exit 0
-            fi
-          '''
-        }
-        sh '''
-          set -eu
-          mkdir -p build
-          if [ -s build/trivy-fs.json ]; then
-            python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
-            exit 0
-          fi
-          python3 - <<'PY'
-import json
-import os
-from pathlib import Path
-
-report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
-if report_path.exists():
-    raise SystemExit(0)
-
-status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
-compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
-payload = {
-    "status": status or "unknown",
-    "compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
-}
-payload = {k: v for k, v in payload.items() if v is not None}
-if "status" not in payload:
-    payload["status"] = "unknown"
-payload["note"] = (
-    "Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
-    "or write build/ironbank-compliance.json in image-building repos."
-)
-
-report_path.parent.mkdir(parents=True, exist_ok=True)
-report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
-PY
-        '''
-      }
-    }
-    stage('Run quality gate') {
-      steps {
-        sh '''
-          set -eu
-          mkdir -p build
-          set +e
-          python3 -m testing.quality_gate --profile jenkins --build-dir build
-          quality_gate_rc=$?
-          set -e
-          printf '%s\n' "${quality_gate_rc}" > build/quality-gate.rc
-        '''
-      }
-    }
-    stage('Publish test metrics') {
-      steps {
-        sh '''
-          set -eu
-          export JUNIT_GLOB='build/junit-*.xml'
-          export QUALITY_GATE_EXIT_CODE_PATH='build/quality-gate.rc'
-          export QUALITY_GATE_SUMMARY_PATH='build/quality-gate-summary.json'
-          python3 ci/scripts/publish_test_metrics.py
-        '''
-      }
-    }
-    stage('Enforce quality gate') {
-      steps {
-        sh '''
-          set -euo pipefail
-          gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)"
-          fail=0
-          if [ "${gate_rc}" -ne 0 ]; then
-            echo "quality gate failed with rc=${gate_rc}" >&2
-            fail=1
-          fi
-
-          enabled() {
-            case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
-              1|true|yes|on) return 0 ;;
-              *) return 1 ;;
-            esac
-          }
-
-          if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
-            sonar_status="$(python3 - <<'PY'
-import json
-from pathlib import Path
-
-path = Path("build/sonarqube-quality-gate.json")
-if not path.exists():
-    print("missing")
-    raise SystemExit(0)
-try:
-    payload = json.loads(path.read_text(encoding="utf-8"))
-except Exception:  # noqa: BLE001
-    print("error")
-    raise SystemExit(0)
-status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
-print(status or "missing")
-PY
-)"
-            case "${sonar_status}" in
-              ok|pass|passed|success) ;;
-              *)
-                echo "sonarqube gate failed: ${sonar_status}" >&2
-                fail=1
-                ;;
-            esac
-          fi
-
-          ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
-          if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
-            ironbank_required=1
-          fi
-          if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
-            supply_status="$(python3 - <<'PY'
-import json
-from pathlib import Path
-
-path = Path("build/ironbank-compliance.json")
-if not path.exists():
-    print("missing")
-    raise SystemExit(0)
-try:
-    payload = json.loads(path.read_text(encoding="utf-8"))
-except Exception:  # noqa: BLE001
-    print("error")
-    raise SystemExit(0)
-compliant = payload.get("compliant")
-if compliant is True:
-    print("ok")
-elif compliant is False:
-    print("failed")
-else:
-    status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
-    print(status or "missing")
-PY
-)"
-            case "${supply_status}" in
-              ok|pass|passed|success|compliant) ;;
-              not_applicable|na|n/a)
-                if enabled "${ironbank_required}"; then
-                  echo "supply chain gate required but status=${supply_status}" >&2
-                  fail=1
-                fi
-                ;;
-              *)
-                if enabled "${ironbank_required}"; then
-                  echo "supply chain gate failed: ${supply_status}" >&2
-                  fail=1
-                else
-                  echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
-                fi
-                ;;
-            esac
-          fi
-
-          exit "${fail}"
-        '''
+        sh 'pytest -q ci/tests/glue'
      }
    }
    stage('Resolve Flux branch') {
@ -381,7 +45,7 @@ PY
        script {
          env.FLUX_BRANCH = sh(
            returnStdout: true,
-            script: "grep -m1 '^\\s*branch:' clusters/atlas/flux-system/gotk-sync.yaml | sed 's/^\\s*branch:\\s*//'"
+            script: "awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml"
          ).trim()
          if (!env.FLUX_BRANCH) {
            error('Flux branch not found in gotk-sync.yaml')
@ -400,20 +64,6 @@ PY
      steps {
        withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
          sh '''
-            set -euo pipefail
-            if ! command -v git >/dev/null 2>&1; then
-              if command -v apk >/dev/null 2>&1; then
-                apk add --no-cache git >/dev/null
-              elif command -v apt-get >/dev/null 2>&1; then
-                apt-get update >/dev/null
-                apt-get install -y git >/dev/null
-              fi
-            fi
-            cd "${WORKSPACE:-$PWD}"
-            if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
-              echo "workspace is not a git checkout; skipping promote"
-              exit 0
-            fi
            set +x
            git config user.email "jenkins@bstein.dev"
            git config user.name "jenkins"
@ -424,18 +74,4 @@ PY
      }
    }
  }
-  post {
-    always {
-      script {
-        if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) {
-          try {
-            junit allowEmptyResults: true, testResults: 'build/junit-*.xml'
-          } catch (Throwable err) {
-            echo "junit step unavailable: ${err.class.simpleName}"
-          }
-        }
-      }
-      archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
-    }
-  }
 }
--- a/README.md
+++ b/README.md
@ -1,29 +1,3 @@
 # titan-iac

-Flux-managed Kubernetes desired-state config for `bstein.dev`.
-
-Canonical source URL:
- `ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git`
-
-## Scope
-
-This repo contains cluster configuration consumed by Flux:
- platform/infrastructure manifests
- service manifests and kustomizations
- operational scripts for render/reconcile workflows
-
-This repo is **not** the Ananke application source repo.
-Ananke lives in `bstein/ananke` and orchestrates host-side shutdown/startup behavior around this desired state.
-
-## Validation workflow
-
-```bash
-kustomize build services/<app>
-kubectl apply --server-side --dry-run=client -k services/<app>
-flux reconcile kustomization <name> --namespace flux-system --with-source
-```
-
-## Apply model
-
-Use Git + Flux as the source of truth.
-Avoid manual in-cluster edits for durable changes.
+Flux-managed Kubernetes cluster for bstein.dev services.
--- a/ci/Jenkinsfile.titan-iac
+++ b/ci/Jenkinsfile.titan-iac
@ -10,47 +10,9 @@ spec:
    hardware: rpi5
    kubernetes.io/arch: arm64
    node-role.kubernetes.io/worker: "true"
-  affinity:
-    nodeAffinity:
-      requiredDuringSchedulingIgnoredDuringExecution:
-        nodeSelectorTerms:
-          - matchExpressions:
-              - key: kubernetes.io/hostname
-                operator: NotIn
-                values:
-                  - titan-06
-      preferredDuringSchedulingIgnoredDuringExecution:
-        - weight: 100
-          preference:
-            matchExpressions:
-              - key: kubernetes.io/hostname
-                operator: NotIn
-                values:
-                  - titan-13
-                  - titan-15
-                  - titan-17
-                  - titan-19
-  topologySpreadConstraints:
-    - maxSkew: 1
-      topologyKey: kubernetes.io/hostname
-      whenUnsatisfiable: ScheduleAnyway
-      labelSelector:
-        matchLabels:
-          jenkins/jenkins-jenkins-agent: "true"
  containers:
-    - name: jnlp
-      image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
-      resources:
-        requests:
-          cpu: "25m"
-          memory: "256Mi"
    - name: python
-      image: registry.bstein.dev/bstein/python:3.12-slim
-      command:
-        - cat
-      tty: true
-    - name: quality-tools
-      image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
+      image: python:3.12-slim
      command:
        - cat
      tty: true
@ -60,21 +22,6 @@ spec:
  environment {
    PIP_DISABLE_PIP_VERSION_CHECK = '1'
    PYTHONUNBUFFERED = '1'
-    SUITE_NAME = 'titan_iac'
-    PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
-    SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
-    SONARQUBE_PROJECT_KEY = 'titan_iac'
-    SONARQUBE_TOKEN = credentials('sonarqube-token')
-    VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
-    QUALITY_GATE_SONARQUBE_ENFORCE = '1'
-    QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
-    QUALITY_GATE_IRONBANK_ENFORCE = '1'
-    QUALITY_GATE_IRONBANK_REQUIRED = '0'
-    QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
-  }
-  options {
-    disableConcurrentBuilds()
-    buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
  }
  stages {
    stage('Checkout') {
@ -84,295 +31,12 @@ spec:
    }
    stage('Install deps') {
      steps {
-        sh '''
-          set -eu
-          if ! command -v git >/dev/null 2>&1; then
-            apt-get update
-            apt-get install -y --no-install-recommends git ca-certificates
-            rm -rf /var/lib/apt/lists/*
-          fi
-          pip install --no-cache-dir -r ci/requirements.txt
-        '''
+        sh 'pip install --no-cache-dir -r ci/requirements.txt'
      }
    }
-    stage('Prepare local quality evidence') {
+    stage('Glue tests') {
      steps {
-        sh '''
-          set -eu
-          mkdir -p build
-          set +e
-          python3 -m testing.quality_gate --profile local --build-dir build
-          local_quality_rc=$?
-          set -e
-          printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
-        '''
-      }
-    }
-    stage('Collect SonarQube evidence') {
-      steps {
-        container('quality-tools') {
-          sh '''#!/usr/bin/env bash
-            set -euo pipefail
-            mkdir -p build
-            args=(
-              "-Dsonar.host.url=${SONARQUBE_HOST_URL}"
-              "-Dsonar.login=${SONARQUBE_TOKEN}"
-              "-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
-              "-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
-              "-Dsonar.sources=."
-              "-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml"
-              "-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
-            )
-            [ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
-            set +e
-            sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
-            rc=${PIPESTATUS[0]}
-            set -e
-            printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
-          '''
-        }
-        sh '''
-          set -eu
-          mkdir -p build
-          python3 - <<'PY'
-import base64
-import json
-import os
-import time
-import urllib.parse
-import urllib.request
-from pathlib import Path
-
-host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
-project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
-token = os.getenv('SONARQUBE_TOKEN', '').strip()
-report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
-
-payload = {
-    "status": "ERROR",
-    "note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
-}
-if host and project_key:
-    task_file = Path('.scannerwork/report-task.txt')
-    task_id = ''
-    if task_file.exists():
-        for line in task_file.read_text(encoding='utf-8').splitlines():
-            key, _, value = line.partition('=')
-            if key == 'ceTaskId':
-                task_id = value.strip()
-                break
-    if task_id:
-        ce_query = urllib.parse.urlencode({"id": task_id})
-        deadline = time.monotonic() + 180
-        while time.monotonic() < deadline:
-            ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
-            if token:
-                encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
-                ce_request.add_header("Authorization", f"Basic {encoded}")
-            try:
-                with urllib.request.urlopen(ce_request, timeout=12) as response:
-                    ce_payload = json.loads(response.read().decode("utf-8"))
-            except Exception:
-                time.sleep(3)
-                continue
-            status = str(ce_payload.get("task", {}).get("status", "")).upper()
-            if status in {"SUCCESS", "FAILED", "CANCELED"}:
-                break
-            time.sleep(3)
-
-    query = urllib.parse.urlencode({"projectKey": project_key})
-    request = urllib.request.Request(
-        f"{host}/api/qualitygates/project_status?{query}",
-        method="GET",
-    )
-    if token:
-        encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
-        request.add_header("Authorization", f"Basic {encoded}")
-    try:
-        with urllib.request.urlopen(request, timeout=12) as response:
-            payload = json.loads(response.read().decode("utf-8"))
-    except Exception as exc:  # noqa: BLE001
-        payload = {"status": "ERROR", "error": str(exc)}
-
-with open(report_path, "w", encoding="utf-8") as handle:
-    json.dump(payload, handle, indent=2, sort_keys=True)
-    handle.write("\\n")
-PY
-        '''
-      }
-    }
-    stage('Collect IronBank evidence') {
-      steps {
-        container('quality-tools') {
-          sh '''#!/usr/bin/env bash
-            set -euo pipefail
-            mkdir -p build
-            set +e
-            trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
-            trivy_rc=$?
-            set -e
-            if [ ! -s build/trivy-fs.json ]; then
-              cat > build/ironbank-compliance.json <<EOF
-{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
-EOF
-              exit 0
-            fi
-          '''
-        }
-        sh '''
-          set -eu
-          mkdir -p build
-          if [ -s build/trivy-fs.json ]; then
-            python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
-            exit 0
-          fi
-          python3 - <<'PY'
-import json
-import os
-from pathlib import Path
-
-report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
-if report_path.exists():
-    raise SystemExit(0)
-
-status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
-compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
-payload = {
-    "status": status or "unknown",
-    "compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
-}
-payload = {k: v for k, v in payload.items() if v is not None}
-if "status" not in payload:
-    payload["status"] = "unknown"
-payload["note"] = (
-    "Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
-    "or write build/ironbank-compliance.json in image-building repos."
-)
-
-report_path.parent.mkdir(parents=True, exist_ok=True)
-report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
-PY
-        '''
-      }
-    }
-    stage('Run quality gate') {
-      steps {
-        sh '''
-          set -eu
-          mkdir -p build
-          set +e
-          python3 -m testing.quality_gate --profile jenkins --build-dir build
-          quality_gate_rc=$?
-          set -e
-          printf '%s\n' "${quality_gate_rc}" > build/quality-gate.rc
-        '''
-      }
-    }
-    stage('Publish test metrics') {
-      steps {
-        sh '''
-          set -eu
-          export JUNIT_GLOB='build/junit-*.xml'
-          export QUALITY_GATE_EXIT_CODE_PATH='build/quality-gate.rc'
-          export QUALITY_GATE_SUMMARY_PATH='build/quality-gate-summary.json'
-          python3 ci/scripts/publish_test_metrics.py
-        '''
-      }
-    }
-    stage('Enforce quality gate') {
-      steps {
-        sh '''
-          set -euo pipefail
-          gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)"
-          fail=0
-          if [ "${gate_rc}" -ne 0 ]; then
-            echo "quality gate failed with rc=${gate_rc}" >&2
-            fail=1
-          fi
-
-          enabled() {
-            case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
-              1|true|yes|on) return 0 ;;
-              *) return 1 ;;
-            esac
-          }
-
-          if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
-            sonar_status="$(python3 - <<'PY'
-import json
-from pathlib import Path
-
-path = Path("build/sonarqube-quality-gate.json")
-if not path.exists():
-    print("missing")
-    raise SystemExit(0)
-try:
-    payload = json.loads(path.read_text(encoding="utf-8"))
-except Exception:  # noqa: BLE001
-    print("error")
-    raise SystemExit(0)
-status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
-print(status or "missing")
-PY
-)"
-            case "${sonar_status}" in
-              ok|pass|passed|success) ;;
-              *)
-                echo "sonarqube gate failed: ${sonar_status}" >&2
-                fail=1
-                ;;
-            esac
-          fi
-
-          ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
-          if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
-            ironbank_required=1
-          fi
-          if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
-            supply_status="$(python3 - <<'PY'
-import json
-from pathlib import Path
-
-path = Path("build/ironbank-compliance.json")
-if not path.exists():
-    print("missing")
-    raise SystemExit(0)
-try:
-    payload = json.loads(path.read_text(encoding="utf-8"))
-except Exception:  # noqa: BLE001
-    print("error")
-    raise SystemExit(0)
-compliant = payload.get("compliant")
-if compliant is True:
-    print("ok")
-elif compliant is False:
-    print("failed")
-else:
-    status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
-    print(status or "missing")
-PY
-)"
-            case "${supply_status}" in
-              ok|pass|passed|success|compliant) ;;
-              not_applicable|na|n/a)
-                if enabled "${ironbank_required}"; then
-                  echo "supply chain gate required but status=${supply_status}" >&2
-                  fail=1
-                fi
-                ;;
-              *)
-                if enabled "${ironbank_required}"; then
-                  echo "supply chain gate failed: ${supply_status}" >&2
-                  fail=1
-                else
-                  echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
-                fi
-                ;;
-            esac
-          fi
-
-          exit "${fail}"
-        '''
+        sh 'pytest -q ci/tests/glue'
      }
    }
    stage('Resolve Flux branch') {
@ -399,20 +63,6 @@ PY
      steps {
        withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
          sh '''
-            set -euo pipefail
-            if ! command -v git >/dev/null 2>&1; then
-              if command -v apk >/dev/null 2>&1; then
-                apk add --no-cache git >/dev/null
-              elif command -v apt-get >/dev/null 2>&1; then
-                apt-get update >/dev/null
-                apt-get install -y git >/dev/null
-              fi
-            fi
-            cd "${WORKSPACE:-$PWD}"
-            if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
-              echo "workspace is not a git checkout; skipping promote"
-              exit 0
-            fi
            set +x
            git config user.email "jenkins@bstein.dev"
            git config user.name "jenkins"
@ -423,18 +73,4 @@ PY
      }
    }
  }
-  post {
-    always {
-      script {
-        if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) {
-          try {
-            junit allowEmptyResults: true, testResults: 'build/junit-*.xml'
-          } catch (Throwable err) {
-            echo "junit step unavailable: ${err.class.simpleName}"
-          }
-        }
-      }
-      archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
-    }
-  }
 }
--- a/ci/requirements.txt
+++ b/ci/requirements.txt
@ -1,7 +1,4 @@
 pytest==8.3.4
-pytest-cov==6.0.0
-coverage==7.6.10
 kubernetes==30.1.0
 PyYAML==6.0.2
 requests==2.32.3
-ruff==0.8.4
--- a/ci/scripts/publish_test_metrics.py
+++ b/ci/scripts/publish_test_metrics.py
@ -1,352 +0,0 @@
-#!/usr/bin/env python3
-"""Publish titan-iac quality-gate results to Pushgateway."""
-
-from __future__ import annotations
-
-import json
-import os
-from glob import glob
-from pathlib import Path
-import sys
-import urllib.error
-import urllib.request
-import xml.etree.ElementTree as ET
-
-sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
-
-from ci.scripts import publish_test_metrics_quality as _quality_helpers
-
-CANONICAL_CHECKS = _quality_helpers.CANONICAL_CHECKS
-_build_check_statuses = _quality_helpers._build_check_statuses
-_combine_statuses = _quality_helpers._combine_statuses
-_infer_sonarqube_status = _quality_helpers._infer_sonarqube_status
-_infer_source_lines_over_500 = _quality_helpers._infer_source_lines_over_500
-_infer_supply_chain_status = _quality_helpers._infer_supply_chain_status
-_infer_workspace_coverage_percent = _quality_helpers._infer_workspace_coverage_percent
-_load_optional_json = _quality_helpers._load_optional_json
-_normalize_result_status = _quality_helpers._normalize_result_status
-
-
-def _escape_label(value: str) -> str:
-    """Escape a Prometheus label value without changing its content."""
-    return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
-
-
-def _label_str(labels: dict[str, str]) -> str:
-    """Render a stable Prometheus label set from a mapping."""
-    parts = [f'{key}="{_escape_label(val)}"' for key, val in labels.items() if val]
-    return "{" + ",".join(parts) + "}" if parts else ""
-
-
-def _read_text(url: str) -> str:
-    """Fetch a plain-text response body from the given URL."""
-    with urllib.request.urlopen(url, timeout=10) as response:
-        return response.read().decode("utf-8")
-
-
-def _post_text(url: str, payload: str) -> None:
-    """PUT a plain-text payload and fail on any 4xx/5xx response."""
-    request = urllib.request.Request(
-        url,
-        data=payload.encode("utf-8"),
-        method="PUT",
-        headers={"Content-Type": "text/plain"},
-    )
-    with urllib.request.urlopen(request, timeout=10) as response:
-        if response.status >= 400:
-            raise RuntimeError(f"push failed with status={response.status}")
-
-
-def _parse_junit(path: str) -> dict[str, int]:
-    """Parse a JUnit XML file into aggregate test counters."""
-    if not os.path.exists(path):
-        return {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
-
-    tree = ET.parse(path)
-    root = tree.getroot()
-    totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
-
-    suites: list[ET.Element]
-    if root.tag == "testsuite":
-        suites = [root]
-    elif root.tag == "testsuites":
-        suites = [elem for elem in root if elem.tag == "testsuite"]
-    else:
-        suites = []
-
-    for suite in suites:
-        for key in totals:
-            raw_value = suite.attrib.get(key, "0")
-            try:
-                totals[key] += int(float(raw_value))
-            except ValueError:
-                totals[key] += 0
-    return totals
-
-
-def _collect_junit_totals(pattern: str) -> dict[str, int]:
-    """Sum JUnit counters across every XML file matching the pattern."""
-    totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
-    for path in sorted(glob(pattern)):
-        parsed = _parse_junit(path)
-        for key in totals:
-            totals[key] += parsed[key]
-    return totals
-
-
-def _collect_junit_cases(pattern: str) -> list[tuple[str, str]]:
-    """Collect individual JUnit test-case statuses for flaky-test trend panels."""
-    cases: list[tuple[str, str]] = []
-    for path in sorted(glob(pattern)):
-        if not os.path.exists(path):
-            continue
-        root = ET.parse(path).getroot()
-        suites: list[ET.Element]
-        if root.tag == "testsuite":
-            suites = [root]
-        elif root.tag == "testsuites":
-            suites = [elem for elem in root if elem.tag == "testsuite"]
-        else:
-            suites = []
-        for suite in suites:
-            for test_case in suite.findall("testcase"):
-                case_name = test_case.attrib.get("name", "").strip()
-                class_name = test_case.attrib.get("classname", "").strip()
-                if not case_name:
-                    continue
-                full_name = f"{class_name}.{case_name}" if class_name else case_name
-                status = "passed"
-                if test_case.find("failure") is not None or test_case.find("error") is not None:
-                    status = "failed"
-                elif test_case.find("skipped") is not None:
-                    status = "skipped"
-                cases.append((full_name, status))
-    return cases
-
-
-def _read_exit_code(path: str) -> int:
-    """Read the quality-gate exit code, defaulting to failure if missing."""
-    try:
-        with open(path, "r", encoding="utf-8") as handle:
-            return int(handle.read().strip())
-    except (FileNotFoundError, ValueError):
-        return 1
-
-
-def _load_summary(path: str) -> dict:
-    """Load the JSON quality-gate summary, returning an empty mapping on error."""
-    try:
-        with open(path, "r", encoding="utf-8") as handle:
-            return json.load(handle)
-    except (FileNotFoundError, json.JSONDecodeError):
-        return {}
-
-
-def _summary_float(summary: dict, key: str) -> float:
-    """Extract a float-like value from the summary, defaulting to 0.0."""
-    value = summary.get(key)
-    if isinstance(value, (int, float)):
-        return float(value)
-    return 0.0
-
-
-def _summary_int(summary: dict, key: str) -> int:
-    """Extract an int-like value from the summary, defaulting to 0."""
-    value = summary.get(key)
-    if isinstance(value, int):
-        return value
-    if isinstance(value, float):
-        return int(value)
-    return 0
-
-
-def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float:
-    """Return the current counter value for a labeled metric if present."""
-    text = _read_text(f"{pushgateway_url.rstrip('/')}/metrics")
-    for line in text.splitlines():
-        if not line.startswith(metric + "{"):
-            continue
-        if any(f'{key}="{value}"' not in line for key, value in labels.items()):
-            continue
-        parts = line.split()
-        if len(parts) < 2:
-            continue
-        try:
-            return float(parts[1])
-        except ValueError:
-            return 0.0
-    return 0.0
-
-
-def _build_payload(
-    suite: str,
-    status: str,
-    tests: dict[str, int],
-    test_cases: list[tuple[str, str]],
-    ok_count: int,
-    failed_count: int,
-    branch: str,
-    build_number: str,
-    jenkins_job: str,
-    summary: dict | None = None,
-    workspace_line_coverage_percent: float = 0.0,
-    source_lines_over_500: int = 0,
-    check_statuses: dict[str, str] | None = None,
-) -> str:
-    """Build the Pushgateway payload for the current suite run."""
-    passed = max(tests["tests"] - tests["failures"] - tests["errors"] - tests["skipped"], 0)
-    build_labels = _label_str(
-        {
-            "suite": suite,
-            "branch": branch or "unknown",
-            "build_number": build_number or "unknown",
-            "jenkins_job": jenkins_job or suite,
-        }
-    )
-    test_case_base_labels = {
-        "suite": suite,
-        "branch": branch or "unknown",
-        "build_number": build_number or "unknown",
-        "jenkins_job": jenkins_job or suite,
-    }
-    lines = [
-        "# TYPE platform_quality_gate_runs_total counter",
-        f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count}',
-        f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {failed_count}',
-        "# TYPE titan_iac_quality_gate_tests_total gauge",
-        f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="passed"}} {passed}',
-        f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="failed"}} {tests["failures"]}',
-        f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="error"}} {tests["errors"]}',
-        f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="skipped"}} {tests["skipped"]}',
-        "# TYPE titan_iac_quality_gate_run_status gauge",
-        f'titan_iac_quality_gate_run_status{{suite="{suite}",status="ok"}} {1 if status == "ok" else 0}',
-        f'titan_iac_quality_gate_run_status{{suite="{suite}",status="failed"}} {1 if status == "failed" else 0}',
-        "# TYPE platform_quality_gate_build_info gauge",
-        f"platform_quality_gate_build_info{build_labels} 1",
-        "# TYPE titan_iac_quality_gate_build_info gauge",
-        f"titan_iac_quality_gate_build_info{build_labels} 1",
-        "# TYPE platform_quality_gate_workspace_line_coverage_percent gauge",
-        f'platform_quality_gate_workspace_line_coverage_percent{{suite="{suite}"}} {workspace_line_coverage_percent:.3f}',
-        "# TYPE platform_quality_gate_source_lines_over_500_total gauge",
-        f'platform_quality_gate_source_lines_over_500_total{{suite="{suite}"}} {source_lines_over_500}',
-    ]
-    if check_statuses:
-        lines.append("# TYPE titan_iac_quality_gate_checks_total gauge")
-        for check_name in CANONICAL_CHECKS:
-            check_status = check_statuses.get(check_name, "not_applicable")
-            lines.append(
-                f'titan_iac_quality_gate_checks_total{{suite="{suite}",check="{_escape_label(check_name)}",result="{_escape_label(check_status)}"}} 1'
-            )
-    lines.append("# TYPE platform_quality_gate_test_case_result gauge")
-    if test_cases:
-        for test_name, test_status in test_cases:
-            labels = {
-                **test_case_base_labels,
-                "test": test_name,
-                "status": test_status,
-            }
-            lines.append(
-                f"platform_quality_gate_test_case_result{_label_str(labels)} 1"
-            )
-    else:
-        labels = {**test_case_base_labels, "test": "__no_test_cases__", "status": "skipped"}
-        lines.append(
-            f"platform_quality_gate_test_case_result{_label_str(labels)} 1"
-        )
-    return "\n".join(lines) + "\n"
-
-
-def main() -> int:
-    """Publish the quality-gate metrics and print a compact run summary."""
-    suite = os.getenv("SUITE_NAME", "titan_iac")
-    pushgateway_url = os.getenv("PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091")
-    job_name = os.getenv("QUALITY_GATE_JOB_NAME", "platform-quality-ci")
-    junit_glob = os.getenv("JUNIT_GLOB", os.getenv("JUNIT_PATH", "build/junit-*.xml"))
-    exit_code_path = os.getenv("QUALITY_GATE_EXIT_CODE_PATH", os.getenv("GLUE_EXIT_CODE_PATH", "build/quality-gate.rc"))
-    summary_path = os.getenv("QUALITY_GATE_SUMMARY_PATH", "build/quality-gate-summary.json")
-    branch = os.getenv("BRANCH_NAME") or os.getenv("GIT_BRANCH") or "unknown"
-    if branch.startswith("origin/"):
-        branch = branch[len("origin/") :]
-    build_number = os.getenv("BUILD_NUMBER", "")
-    jenkins_job = os.getenv("JOB_NAME", "titan-iac")
-
-    tests = _collect_junit_totals(junit_glob)
-    test_cases = _collect_junit_cases(junit_glob)
-    exit_code = _read_exit_code(exit_code_path)
-    status = "ok" if exit_code == 0 else "failed"
-    summary = _load_summary(summary_path)
-    workspace_line_coverage_percent = _summary_float(summary, "workspace_line_coverage_percent")
-    if workspace_line_coverage_percent <= 0:
-        workspace_line_coverage_percent = _infer_workspace_coverage_percent(summary, "build/coverage-unit.xml")
-    source_lines_over_500 = _summary_int(summary, "source_lines_over_500")
-    if source_lines_over_500 <= 0:
-        source_lines_over_500 = _infer_source_lines_over_500(summary)
-    sonarqube_report = _load_optional_json(os.getenv("QUALITY_GATE_SONARQUBE_REPORT", "build/sonarqube-quality-gate.json"))
-    supply_chain_report = _load_optional_json(os.getenv("QUALITY_GATE_IRONBANK_REPORT", "build/ironbank-compliance.json"))
-    supply_chain_required = os.getenv("QUALITY_GATE_IRONBANK_REQUIRED", "0").strip().lower() in {"1", "true", "yes", "on"}
-    check_statuses = _build_check_statuses(
-        summary=summary,
-        tests=tests,
-        workspace_line_coverage_percent=workspace_line_coverage_percent,
-        source_lines_over_500=source_lines_over_500,
-        sonarqube_report=sonarqube_report,
-        supply_chain_report=supply_chain_report,
-        supply_chain_required=supply_chain_required,
-    )
-
-    ok_count = int(
-        _fetch_existing_counter(
-            pushgateway_url,
-            "platform_quality_gate_runs_total",
-            {"job": job_name, "suite": suite, "status": "ok"},
-        )
-    )
-    failed_count = int(
-        _fetch_existing_counter(
-            pushgateway_url,
-            "platform_quality_gate_runs_total",
-            {"job": job_name, "suite": suite, "status": "failed"},
-        )
-    )
-    if status == "ok":
-        ok_count += 1
-    else:
-        failed_count += 1
-
-    payload = _build_payload(
-        suite=suite,
-        status=status,
-        tests=tests,
-        test_cases=test_cases,
-        ok_count=ok_count,
-        failed_count=failed_count,
-        branch=branch,
-        build_number=build_number,
-        jenkins_job=jenkins_job,
-        summary=summary,
-        workspace_line_coverage_percent=workspace_line_coverage_percent,
-        source_lines_over_500=source_lines_over_500,
-        check_statuses=check_statuses,
-    )
-    push_url = f"{pushgateway_url.rstrip('/')}/metrics/job/{job_name}/suite/{suite}"
-    _post_text(push_url, payload)
-
-    summary = {
-        "suite": suite,
-        "status": status,
-        "tests_total": tests["tests"],
-        "tests_failed": tests["failures"],
-        "tests_error": tests["errors"],
-        "tests_skipped": tests["skipped"],
-        "ok_count": ok_count,
-        "failed_count": failed_count,
-        "checks_recorded": len(check_statuses),
-        "workspace_line_coverage_percent": workspace_line_coverage_percent,
-        "source_lines_over_500": source_lines_over_500,
-    }
-    print(json.dumps(summary, sort_keys=True))
-    return 0
-
-
-if __name__ == "__main__":  # pragma: no cover
-    raise SystemExit(main())
--- a/ci/scripts/publish_test_metrics_quality.py
+++ b/ci/scripts/publish_test_metrics_quality.py
@ -1,200 +0,0 @@
-#!/usr/bin/env python3
-"""Quality/status helpers for publish_test_metrics."""
-
-from __future__ import annotations
-
-import json
-from pathlib import Path
-import xml.etree.ElementTree as ET
-
-SUCCESS_STATUSES = {"ok", "pass", "passed", "success", "compliant"}
-NOT_APPLICABLE_STATUSES = {"not_applicable", "n/a", "na", "none", "skipped"}
-FAILED_STATUSES = {"failed", "fail", "error", "errors", "warn", "warning", "red"}
-
-CANONICAL_CHECKS = [
-    "tests",
-    "coverage",
-    "loc",
-    "docs_naming",
-    "gate_glue",
-    "sonarqube",
-    "supply_chain",
-]
-
-
-def _infer_workspace_coverage_percent(summary: dict, default_xml: str) -> float:
-    """Infer workspace line coverage from quality summary coverage XML metadata."""
-    results = summary.get("results", []) if isinstance(summary, dict) else []
-    coverage_xml = default_xml
-    for result in results:
-        if not isinstance(result, dict):
-            continue
-        if str(result.get("name") or "").strip().lower() != "coverage":
-            continue
-        candidate = str(result.get("coverage_xml") or "").strip()
-        if candidate:
-            coverage_xml = candidate
-            break
-    xml_path = Path(coverage_xml)
-    if not xml_path.exists():
-        return 0.0
-    try:
-        root = ET.parse(xml_path).getroot()
-        line_rate = root.attrib.get("line-rate")
-        if line_rate is None:
-            return 0.0
-        return float(line_rate) * 100.0
-    except (ET.ParseError, OSError, ValueError):
-        return 0.0
-
-
-def _infer_source_lines_over_500(summary: dict) -> int:
-    """Infer over-limit source file count from hygiene issue payloads."""
-    results = summary.get("results", []) if isinstance(summary, dict) else []
-    for result in results:
-        if not isinstance(result, dict):
-            continue
-        if str(result.get("name") or "").strip().lower() not in {"hygiene", "loc", "smell"}:
-            continue
-        issues = result.get("issues")
-        if not isinstance(issues, list):
-            continue
-        return sum(1 for item in issues if isinstance(item, str) and item.startswith("file exceeds"))
-    return 0
-
-
-def _normalize_result_status(value: str | None, default: str = "failed") -> str:
-    """Map arbitrary check status text into canonical check result buckets."""
-    if not value:
-        return default
-    normalized = value.strip().lower()
-    if normalized in SUCCESS_STATUSES:
-        return "ok"
-    if normalized in NOT_APPLICABLE_STATUSES:
-        return "not_applicable"
-    if normalized in FAILED_STATUSES:
-        return "failed"
-    return default
-
-
-def _load_optional_json(path: str | None) -> dict:
-    """Load an optional JSON report file, returning an empty object when absent."""
-    if not path:
-        return {}
-    candidate = Path(path)
-    if not candidate.exists():
-        return {}
-    try:
-        return json.loads(candidate.read_text(encoding="utf-8"))
-    except json.JSONDecodeError:
-        return {}
-
-
-def _combine_statuses(statuses: list[str]) -> str:
-    """Roll up many check statuses into one canonical result."""
-    if not statuses:
-        return "not_applicable"
-    if any(status == "failed" for status in statuses):
-        return "failed"
-    if all(status == "not_applicable" for status in statuses):
-        return "not_applicable"
-    if all(status in {"ok", "not_applicable"} for status in statuses):
-        return "ok"
-    return "failed"
-
-
-def _infer_sonarqube_status(report: dict) -> str:
-    """Infer canonical SonarQube check status from its JSON report payload."""
-    if not report:
-        return "not_applicable"
-    status = (
-        report.get("projectStatus", {}).get("status")
-        or report.get("qualityGate", {}).get("status")
-        or report.get("status")
-    )
-    return _normalize_result_status(str(status) if status is not None else None, default="failed")
-
-
-def _infer_supply_chain_status(report: dict, required: bool) -> str:
-    """Infer canonical supply-chain status from IronBank/artifact report payload."""
-    if not report:
-        return "failed" if required else "not_applicable"
-    compliant = report.get("compliant")
-    if isinstance(compliant, bool):
-        return "ok" if compliant else "failed"
-    status = report.get("status")
-    if status is None:
-        return "failed" if required else "not_applicable"
-    normalized = _normalize_result_status(str(status), default="failed")
-    if normalized == "not_applicable" and required:
-        return "failed"
-    return normalized
-
-
-def _build_check_statuses(
-    summary: dict | None,
-    tests: dict[str, int],
-    workspace_line_coverage_percent: float,
-    source_lines_over_500: int,
-    sonarqube_report: dict,
-    supply_chain_report: dict,
-    supply_chain_required: bool,
-) -> dict[str, str]:
-    """Generate the canonical quality-check status map for dashboarding."""
-    raw_results = summary.get("results", []) if isinstance(summary, dict) else []
-    status_by_name: dict[str, str] = {}
-    for result in raw_results:
-        if not isinstance(result, dict):
-            continue
-        check_name = str(result.get("name") or "").strip().lower()
-        if not check_name:
-            continue
-        status_by_name[check_name] = _normalize_result_status(result.get("status"), default="failed")
-
-    tests_status = status_by_name.get("tests")
-    if not tests_status:
-        candidate_keys = ["unit", "integration", "e2e", "pytest", "test", "tests"]
-        candidates = [status_by_name[key] for key in candidate_keys if key in status_by_name]
-        if candidates:
-            tests_status = _combine_statuses(candidates)
-        elif tests["tests"] > 0:
-            tests_status = "ok" if (tests["failures"] + tests["errors"]) == 0 else "failed"
-        else:
-            tests_status = "not_applicable"
-
-    coverage_status = status_by_name.get("coverage")
-    if not coverage_status:
-        if workspace_line_coverage_percent > 0:
-            coverage_status = "ok" if workspace_line_coverage_percent >= 95.0 else "failed"
-        else:
-            coverage_status = "not_applicable"
-
-    loc_status = status_by_name.get("loc")
-    if not loc_status:
-        loc_status = "ok" if source_lines_over_500 == 0 else "failed"
-
-    docs_naming_status = status_by_name.get("docs_naming")
-    if not docs_naming_status:
-        candidates = [status_by_name[key] for key in ["docs", "hygiene", "smell", "lint", "naming"] if key in status_by_name]
-        docs_naming_status = _combine_statuses(candidates) if candidates else "not_applicable"
-
-    gate_glue_status = status_by_name.get("gate_glue")
-    if not gate_glue_status:
-        candidates = [status_by_name[key] for key in ["gate_glue", "glue", "gate"] if key in status_by_name]
-        gate_glue_status = _combine_statuses(candidates) if candidates else "not_applicable"
-
-    sonarqube_status = status_by_name.get("sonarqube") or _infer_sonarqube_status(sonarqube_report)
-    supply_chain_status = status_by_name.get("supply_chain") or _infer_supply_chain_status(
-        supply_chain_report,
-        required=supply_chain_required,
-    )
-
-    return {
-        "tests": tests_status,
-        "coverage": coverage_status,
-        "loc": loc_status,
-        "docs_naming": docs_naming_status,
-        "gate_glue": gate_glue_status,
-        "sonarqube": sonarqube_status,
-        "supply_chain": supply_chain_status,
-    }
--- a/ci/scripts/supply_chain_report.py
+++ b/ci/scripts/supply_chain_report.py
@ -1,173 +0,0 @@
-"""Build a titan-iac supply-chain compliance report from Trivy evidence."""
-
-from __future__ import annotations
-
-import argparse
-import datetime as dt
-import json
-from pathlib import Path
-from typing import Any
-
-
-FAIL_SEVERITIES = {"HIGH", "CRITICAL"}
-
-
-def _read_json(path: Path) -> dict[str, Any]:
-    """Read a JSON object from disk for use as pipeline evidence."""
-    payload = json.loads(path.read_text(encoding="utf-8"))
-    if not isinstance(payload, dict):
-        raise ValueError(f"{path} must contain a JSON object")
-    return payload
-
-
-def _parse_day(raw: str | None) -> dt.date | None:
-    """Parse an ISO day while letting optional waiver dates stay optional."""
-    if not raw:
-        return None
-    return dt.date.fromisoformat(raw)
-
-
-def _today(override: str | None = None) -> dt.date:
-    """Return the policy day so tests can pin expiry behavior."""
-    return _parse_day(override) or dt.date.today()
-
-
-def _load_waiver_pairs(path: Path | None, policy_day: dt.date) -> tuple[set[tuple[str, str]], int]:
-    """Return active ``(misconfiguration id, target)`` waivers and expired count."""
-    if path is None or not path.exists():
-        return set(), 0
-
-    payload = _read_json(path)
-    default_expires_at = payload.get("default_expires_at")
-    active: set[tuple[str, str]] = set()
-    expired = 0
-
-    for entry in payload.get("misconfigurations", []):
-        if not isinstance(entry, dict):
-            continue
-        misconfiguration_id = str(entry.get("id") or "").strip()
-        if not misconfiguration_id:
-            continue
-        expires_at = _parse_day(str(entry.get("expires_at") or default_expires_at or ""))
-        targets = entry.get("targets", [])
-        if not isinstance(targets, list):
-            continue
-
-        if expires_at and expires_at < policy_day:
-            expired += len(targets)
-            continue
-
-        # Waivers are target-specific so a new unsafe manifest fails until it is
-        # either fixed or deliberately accepted with a fresh expiration.
-        for target in targets:
-            if isinstance(target, str) and target:
-                active.add((misconfiguration_id, target))
-
-    return active, expired
-
-
-def _iter_failed_misconfigurations(payload: dict[str, Any]):
-    """Yield failed high/critical Trivy misconfiguration records."""
-    for result in payload.get("Results", []):
-        if not isinstance(result, dict):
-            continue
-        target = str(result.get("Target") or "")
-        for item in result.get("Misconfigurations") or []:
-            if not isinstance(item, dict):
-                continue
-            if item.get("Status") != "FAIL":
-                continue
-            if str(item.get("Severity") or "").upper() not in FAIL_SEVERITIES:
-                continue
-            yield target, item
-
-
-def _count_vulnerabilities(payload: dict[str, Any], severity: str) -> int:
-    """Count Trivy vulnerabilities at a specific severity."""
-    count = 0
-    for result in payload.get("Results", []):
-        if not isinstance(result, dict):
-            continue
-        for item in result.get("Vulnerabilities") or []:
-            if isinstance(item, dict) and str(item.get("Severity") or "").upper() == severity:
-                count += 1
-    return count
-
-
-def _count_secrets(payload: dict[str, Any]) -> int:
-    """Count detected secrets in the Trivy filesystem report."""
-    count = 0
-    for result in payload.get("Results", []):
-        if isinstance(result, dict):
-            count += len(result.get("Secrets") or [])
-    return count
-
-
-def build_report(
-    trivy_payload: dict[str, Any],
-    waiver_path: Path | None = None,
-    today_override: str | None = None,
-) -> dict[str, Any]:
-    """Build the compliance summary consumed by the quality gate."""
-    policy_day = _today(today_override)
-    active_waivers, expired_waivers = _load_waiver_pairs(waiver_path, policy_day)
-
-    open_misconfigs: list[dict[str, str]] = []
-    waived_misconfigs = 0
-    for target, item in _iter_failed_misconfigurations(trivy_payload):
-        misconfiguration_id = str(item.get("ID") or "")
-        if (misconfiguration_id, target) in active_waivers:
-            waived_misconfigs += 1
-            continue
-        open_misconfigs.append(
-            {
-                "id": misconfiguration_id,
-                "target": target,
-                "severity": str(item.get("Severity") or ""),
-                "title": str(item.get("Title") or ""),
-            }
-        )
-
-    critical = _count_vulnerabilities(trivy_payload, "CRITICAL")
-    high = _count_vulnerabilities(trivy_payload, "HIGH")
-    secrets = _count_secrets(trivy_payload)
-    status = "ok" if critical == 0 and secrets == 0 and not open_misconfigs else "failed"
-
-    return {
-        "status": status,
-        "compliant": status == "ok",
-        "category": "artifact_security",
-        "scan_type": "filesystem",
-        "scanner": "trivy",
-        "critical_vulnerabilities": critical,
-        "high_vulnerabilities": high,
-        "high_vulnerability_policy": "observe",
-        "secrets": secrets,
-        "high_or_critical_misconfigurations": len(open_misconfigs),
-        "waived_misconfigurations": waived_misconfigs,
-        "expired_waivers": expired_waivers,
-        "waiver_file": str(waiver_path) if waiver_path else "",
-        "open_misconfiguration_examples": open_misconfigs[:20],
-    }
-
-
-def main(argv: list[str] | None = None) -> int:
-    """CLI entrypoint used by Jenkins after the Trivy scan completes."""
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--trivy-json", required=True)
-    parser.add_argument("--waivers")
-    parser.add_argument("--output", required=True)
-    parser.add_argument("--today")
-    args = parser.parse_args(argv)
-
-    trivy_payload = _read_json(Path(args.trivy_json))
-    waiver_path = Path(args.waivers) if args.waivers else None
-    report = build_report(trivy_payload, waiver_path=waiver_path, today_override=args.today)
-    output_path = Path(args.output)
-    output_path.parent.mkdir(parents=True, exist_ok=True)
-    output_path.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8")
-    return 0
-
-
-if __name__ == "__main__":  # pragma: no cover
-    raise SystemExit(main())
--- a/ci/tests/glue/config.yaml
+++ b/ci/tests/glue/config.yaml
@ -1,7 +1,6 @@
 max_success_age_hours: 48
 allow_suspended:
  - bstein-dev-home/vaultwarden-cred-sync
-  - comms/guest-name-randomizer
  - comms/othrys-room-reset
  - comms/pin-othrys-invite
  - comms/seed-othrys-room
@ -10,7 +9,6 @@ allow_suspended:
  - health/wger-user-sync
  - mailu-mailserver/mailu-sync-nightly
  - nextcloud/nextcloud-mail-sync
-  - vault/vault-oidc-config
 ariadne_schedule_tasks:
  - schedule.mailu_sync
  - schedule.nextcloud_sync
--- a/ci/tests/glue/test_ariadne_schedules.py
+++ b/ci/tests/glue/test_ariadne_schedules.py
@ -1,108 +0,0 @@
-"""Glue checks for Ariadne schedules exported to VictoriaMetrics."""
-
-from __future__ import annotations
-
-import os
-from datetime import datetime, timezone
-from pathlib import Path
-
-import requests
-import yaml
-
-
-CONFIG_PATH = Path(__file__).with_name("config.yaml")
-
-
-def _load_config() -> dict:
-    with CONFIG_PATH.open("r", encoding="utf-8") as handle:
-        return yaml.safe_load(handle) or {}
-
-
-def _query(promql: str) -> list[dict]:
-    vm_url = os.environ.get("VM_URL", "http://victoria-metrics-single-server:8428").rstrip("/")
-    response = requests.get(f"{vm_url}/api/v1/query", params={"query": promql}, timeout=10)
-    response.raise_for_status()
-    payload = response.json()
-    return payload.get("data", {}).get("result", [])
-
-
-def _expected_tasks() -> list[dict]:
-    cfg = _load_config()
-    tasks = [
-        _normalize_task(item, cfg)
-        for item in cfg.get("ariadne_schedule_tasks", [])
-    ]
-    assert tasks, "No Ariadne schedule tasks configured"
-    return tasks
-
-
-def _normalize_task(item: object, cfg: dict) -> dict:
-    if isinstance(item, str):
-        return {
-            "task": item,
-            "check_last_success": True,
-            "max_success_age_hours": cfg.get("max_success_age_hours", 48),
-        }
-    if isinstance(item, dict):
-        normalized = dict(item)
-        normalized.setdefault("check_last_success", True)
-        normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
-        return normalized
-    raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
-
-
-def _tracked_tasks(tasks: list[dict]) -> list[dict]:
-    tracked = [item for item in tasks if item.get("check_last_success")]
-    assert tracked, "No Ariadne schedule tasks are marked for success tracking"
-    return tracked
-
-
-def _task_regex(tasks: list[dict]) -> str:
-    return "|".join(item["task"] for item in tasks)
-
-
-def test_ariadne_schedule_series_exist():
-    tasks = _expected_tasks()
-    selector = _task_regex(tasks)
-    series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
-    seen = {item.get("metric", {}).get("task") for item in series}
-    missing = [item["task"] for item in tasks if item["task"] not in seen]
-    assert not missing, f"Missing next-run metrics for: {', '.join(missing)}"
-
-
-def test_ariadne_schedule_recent_success():
-    tasks = _tracked_tasks(_expected_tasks())
-    selector = _task_regex(tasks)
-    series = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
-    seen = {item.get("metric", {}).get("task") for item in series}
-    missing = [item["task"] for item in tasks if item["task"] not in seen]
-    assert not missing, f"Missing last-success metrics for: {', '.join(missing)}"
-
-    now = datetime.now(timezone.utc)
-    age_by_task = {
-        item.get("metric", {}).get("task"): (now - datetime.fromtimestamp(float(item["value"][1]), tz=timezone.utc)).total_seconds() / 3600
-        for item in series
-    }
-    too_old = [
-        f"{task} ({age_by_task[task]:.1f}h > {item['max_success_age_hours']}h)"
-        for item in tasks
-        if (task := item["task"]) in age_by_task and age_by_task[task] > float(item["max_success_age_hours"])
-    ]
-    assert not too_old, "Ariadne schedules are stale: " + ", ".join(too_old)
-
-
-def test_ariadne_schedule_last_status_present_and_boolean():
-    tasks = _tracked_tasks(_expected_tasks())
-    selector = _task_regex(tasks)
-    series = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
-    seen = {item.get("metric", {}).get("task") for item in series}
-    missing = [item["task"] for item in tasks if item["task"] not in seen]
-    assert not missing, f"Missing last-status metrics for: {', '.join(missing)}"
-
-    invalid = []
-    for item in series:
-        task = item.get("metric", {}).get("task")
-        value = float(item["value"][1])
-        if value not in (0.0, 1.0):
-            invalid.append(f"{task}={value}")
-    assert not invalid, f"Unexpected Ariadne last-status values: {', '.join(invalid)}"
--- a/ci/tests/glue/test_glue_metrics.py
+++ b/ci/tests/glue/test_glue_metrics.py
@ -1,5 +1,3 @@
-"""Glue checks for the metrics the quality-gate publishes."""
-
 from __future__ import annotations

 import os
@ -25,63 +23,26 @@ def _query(promql: str) -> list[dict]:
    return payload.get("data", {}).get("result", [])


-def _expected_tasks() -> list[dict]:
-    cfg = _load_config()
-    tasks = [
-        _normalize_task(item, cfg)
-        for item in cfg.get("ariadne_schedule_tasks", [])
-    ]
-    assert tasks, "No Ariadne schedule tasks configured"
-    return tasks
+def test_glue_metrics_present():
+    series = _query('kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}')
+    assert series, "No glue cronjob label series found"


-def _normalize_task(item: object, cfg: dict) -> dict:
-    if isinstance(item, str):
-        return {
-            "task": item,
-            "check_last_success": True,
-            "max_success_age_hours": cfg.get("max_success_age_hours", 48),
-        }
-    if isinstance(item, dict):
-        normalized = dict(item)
-        normalized.setdefault("check_last_success", True)
-        normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
-        return normalized
-    raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
-
-
-def _tracked_tasks(tasks: list[dict]) -> list[dict]:
-    tracked = [item for item in tasks if item.get("check_last_success")]
-    assert tracked, "No Ariadne schedule tasks are marked for success tracking"
-    return tracked
-
-
-def _task_regex(tasks: list[dict]) -> str:
-    return "|".join(item["task"] for item in tasks)
+def test_glue_metrics_success_join():
+    query = (
+        "kube_cronjob_status_last_successful_time "
+        'and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}'
+    )
+    series = _query(query)
+    assert series, "No glue cronjob last success series found"


 def test_ariadne_schedule_metrics_present():
-    tasks = _expected_tasks()
-    selector = _task_regex(tasks)
-    series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
-    seen = {item.get("metric", {}).get("task") for item in series}
-    missing = [item["task"] for item in tasks if item["task"] not in seen]
+    cfg = _load_config()
+    expected = cfg.get("ariadne_schedule_tasks", [])
+    if not expected:
+        return
+    series = _query("ariadne_schedule_next_run_timestamp_seconds")
+    tasks = {item.get("metric", {}).get("task") for item in series}
+    missing = [task for task in expected if task not in tasks]
    assert not missing, f"Missing Ariadne schedule metrics for: {', '.join(missing)}"
-
-
-def test_ariadne_schedule_success_and_status_metrics_present():
-    tasks = _tracked_tasks(_expected_tasks())
-    selector = _task_regex(tasks)
-
-    success = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
-    status = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
-
-    success_tasks = {item.get("metric", {}).get("task") for item in success}
-    status_tasks = {item.get("metric", {}).get("task") for item in status}
-    expected = {item["task"] for item in tasks}
-
-    missing_success = sorted(expected - success_tasks)
-    missing_status = sorted(expected - status_tasks)
-
-    assert not missing_success, f"Missing Ariadne success metrics for: {', '.join(missing_success)}"
-    assert not missing_status, f"Missing Ariadne status metrics for: {', '.join(missing_status)}"
--- a/ci/titan-iac-trivy-waivers.json
+++ b/ci/titan-iac-trivy-waivers.json
@ -1,401 +0,0 @@
-{
-  "version": 1,
-  "generated_from": "Jenkins titan-iac build 225 Trivy filesystem scan",
-  "default_expires_at": "2026-05-22",
-  "ticket": "atlas-quality-wave-k8s-hardening",
-  "default_reason": "Existing Kubernetes manifest hardening baseline accepted only for the first quality-gate rollout; fix or renew explicitly before expiry.",
-  "misconfigurations": [
-    {
-      "id": "DS-0002",
-      "targets": [
-        "dockerfiles/Dockerfile.ananke-node-helper"
-      ]
-    },
-    {
-      "id": "KSV-0009",
-      "targets": [
-        "services/mailu/vip-controller.yaml",
-        "services/maintenance/k3s-agent-restart-daemonset.yaml"
-      ]
-    },
-    {
-      "id": "KSV-0010",
-      "targets": [
-        "services/maintenance/k3s-agent-restart-daemonset.yaml",
-        "services/maintenance/metis-sentinel-amd64-daemonset.yaml",
-        "services/maintenance/metis-sentinel-arm64-daemonset.yaml",
-        "services/monitoring/jetson-tegrastats-exporter.yaml"
-      ]
-    },
-    {
-      "id": "KSV-0014",
-      "targets": [
-        "infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
-        "infrastructure/core/ntp-sync-daemonset.yaml",
-        "infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
-        "infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
-        "infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
-        "infrastructure/longhorn/core/vault-sync-deployment.yaml",
-        "infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
-        "infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
-        "infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
-        "infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
-        "infrastructure/postgres/statefulset.yaml",
-        "infrastructure/vault-csi/vault-csi-provider.yaml",
-        "services/ai-llm/deployment.yaml",
-        "services/bstein-dev-home/backend-deployment.yaml",
-        "services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
-        "services/bstein-dev-home/frontend-deployment.yaml",
-        "services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
-        "services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
-        "services/bstein-dev-home/vault-sync-deployment.yaml",
-        "services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
-        "services/comms/atlasbot-deployment.yaml",
-        "services/comms/coturn.yaml",
-        "services/comms/element-call-deployment.yaml",
-        "services/comms/guest-name-job.yaml",
-        "services/comms/guest-register-deployment.yaml",
-        "services/comms/livekit-token-deployment.yaml",
-        "services/comms/livekit.yaml",
-        "services/comms/mas-deployment.yaml",
-        "services/comms/oneoffs/bstein-force-leave-job.yaml",
-        "services/comms/oneoffs/comms-secrets-ensure-job.yaml",
-        "services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
-        "services/comms/oneoffs/mas-db-ensure-job.yaml",
-        "services/comms/oneoffs/mas-local-users-ensure-job.yaml",
-        "services/comms/oneoffs/othrys-kick-numeric-job.yaml",
-        "services/comms/oneoffs/synapse-admin-ensure-job.yaml",
-        "services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
-        "services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
-        "services/comms/oneoffs/synapse-user-seed-job.yaml",
-        "services/comms/pin-othrys-job.yaml",
-        "services/comms/reset-othrys-room-job.yaml",
-        "services/comms/seed-othrys-room.yaml",
-        "services/comms/vault-sync-deployment.yaml",
-        "services/comms/wellknown.yaml",
-        "services/crypto/monerod/deployment.yaml",
-        "services/crypto/wallet-monero-temp/deployment.yaml",
-        "services/crypto/xmr-miner/deployment.yaml",
-        "services/crypto/xmr-miner/vault-sync-deployment.yaml",
-        "services/crypto/xmr-miner/xmrig-daemonset.yaml",
-        "services/finance/actual-budget-deployment.yaml",
-        "services/finance/firefly-cronjob.yaml",
-        "services/finance/firefly-deployment.yaml",
-        "services/finance/firefly-user-sync-cronjob.yaml",
-        "services/finance/oneoffs/finance-secrets-ensure-job.yaml",
-        "services/gitea/deployment.yaml",
-        "services/harbor/vault-sync-deployment.yaml",
-        "services/health/wger-admin-ensure-cronjob.yaml",
-        "services/health/wger-deployment.yaml",
-        "services/health/wger-user-sync-cronjob.yaml",
-        "services/jellyfin/deployment.yaml",
-        "services/jellyfin/loader.yaml",
-        "services/jenkins/deployment.yaml",
-        "services/jenkins/vault-sync-deployment.yaml",
-        "services/keycloak/deployment.yaml",
-        "services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/ldap-federation-job.yaml",
-        "services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
-        "services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/portal-e2e-client-job.yaml",
-        "services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
-        "services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
-        "services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
-        "services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
-        "services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/realm-settings-job.yaml",
-        "services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/user-overrides-job.yaml",
-        "services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
-        "services/keycloak/vault-sync-deployment.yaml",
-        "services/logging/node-image-gc-rpi4-daemonset.yaml",
-        "services/logging/node-image-prune-rpi5-daemonset.yaml",
-        "services/logging/node-log-rotation-daemonset.yaml",
-        "services/logging/oauth2-proxy.yaml",
-        "services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
-        "services/logging/oneoffs/opensearch-ism-job.yaml",
-        "services/logging/oneoffs/opensearch-observability-setup-job.yaml",
-        "services/logging/opensearch-prune-cronjob.yaml",
-        "services/logging/vault-sync-deployment.yaml",
-        "services/mailu/mailu-sync-cronjob.yaml",
-        "services/mailu/mailu-sync-listener.yaml",
-        "services/mailu/oneoffs/mailu-sync-job.yaml",
-        "services/mailu/vault-sync-deployment.yaml",
-        "services/mailu/vip-controller.yaml",
-        "services/maintenance/ariadne-deployment.yaml",
-        "services/maintenance/disable-k3s-traefik-daemonset.yaml",
-        "services/maintenance/image-sweeper-cronjob.yaml",
-        "services/maintenance/k3s-agent-restart-daemonset.yaml",
-        "services/maintenance/metis-deployment.yaml",
-        "services/maintenance/metis-k3s-token-sync-cronjob.yaml",
-        "services/maintenance/metis-sentinel-amd64-daemonset.yaml",
-        "services/maintenance/metis-sentinel-arm64-daemonset.yaml",
-        "services/maintenance/node-image-sweeper-daemonset.yaml",
-        "services/maintenance/node-nofile-daemonset.yaml",
-        "services/maintenance/oauth2-proxy-metis.yaml",
-        "services/maintenance/oauth2-proxy-soteria.yaml",
-        "services/maintenance/oneoffs/ariadne-migrate-job.yaml",
-        "services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
-        "services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
-        "services/maintenance/pod-cleaner-cronjob.yaml",
-        "services/maintenance/soteria-deployment.yaml",
-        "services/maintenance/vault-sync-deployment.yaml",
-        "services/monitoring/dcgm-exporter.yaml",
-        "services/monitoring/jetson-tegrastats-exporter.yaml",
-        "services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
-        "services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
-        "services/monitoring/platform-quality-gateway-deployment.yaml",
-        "services/monitoring/platform-quality-suite-probe-cronjob.yaml",
-        "services/monitoring/postmark-exporter-deployment.yaml",
-        "services/monitoring/vault-sync-deployment.yaml",
-        "services/nextcloud-mail-sync/cronjob.yaml",
-        "services/nextcloud/collabora.yaml",
-        "services/nextcloud/cronjob.yaml",
-        "services/nextcloud/deployment.yaml",
-        "services/nextcloud/maintenance-cronjob.yaml",
-        "services/oauth2-proxy/deployment.yaml",
-        "services/openldap/statefulset.yaml",
-        "services/outline/deployment.yaml",
-        "services/outline/redis-deployment.yaml",
-        "services/pegasus/deployment.yaml",
-        "services/pegasus/vault-sync-deployment.yaml",
-        "services/planka/deployment.yaml",
-        "services/quality/oauth2-proxy-sonarqube.yaml",
-        "services/quality/sonarqube-deployment.yaml",
-        "services/quality/sonarqube-exporter-deployment.yaml",
-        "services/sui-metrics/base/deployment.yaml",
-        "services/typhon/vault-sync-deployment.yaml",
-        "services/vault/k8s-auth-config-cronjob.yaml",
-        "services/vault/oidc-config-cronjob.yaml",
-        "services/vault/statefulset.yaml",
-        "services/vaultwarden/deployment.yaml"
-      ]
-    },
-    {
-      "id": "KSV-0017",
-      "targets": [
-        "infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
-        "infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
-        "infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
-        "services/logging/node-image-gc-rpi4-daemonset.yaml",
-        "services/logging/node-image-prune-rpi5-daemonset.yaml",
-        "services/logging/node-log-rotation-daemonset.yaml",
-        "services/maintenance/disable-k3s-traefik-daemonset.yaml",
-        "services/maintenance/image-sweeper-cronjob.yaml",
-        "services/maintenance/k3s-agent-restart-daemonset.yaml",
-        "services/maintenance/metis-deployment.yaml",
-        "services/maintenance/metis-sentinel-amd64-daemonset.yaml",
-        "services/maintenance/metis-sentinel-arm64-daemonset.yaml",
-        "services/maintenance/node-image-sweeper-daemonset.yaml",
-        "services/maintenance/node-nofile-daemonset.yaml",
-        "services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
-        "services/monitoring/dcgm-exporter.yaml",
-        "services/monitoring/jetson-tegrastats-exporter.yaml"
-      ]
-    },
-    {
-      "id": "KSV-0041",
-      "targets": [
-        "infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
-        "infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
-        "infrastructure/traefik/clusterrole.yaml",
-        "services/bstein-dev-home/rbac.yaml",
-        "services/comms/comms-secrets-ensure-rbac.yaml",
-        "services/comms/mas-db-ensure-rbac.yaml",
-        "services/comms/mas-secrets-ensure-rbac.yaml",
-        "services/maintenance/soteria-rbac.yaml"
-      ]
-    },
-    {
-      "id": "KSV-0047",
-      "targets": [
-        "services/monitoring/rbac.yaml"
-      ]
-    },
-    {
-      "id": "KSV-0053",
-      "targets": [
-        "services/comms/comms-secrets-ensure-rbac.yaml",
-        "services/comms/mas-db-ensure-rbac.yaml",
-        "services/jenkins/serviceaccount.yaml",
-        "services/maintenance/ariadne-rbac.yaml"
-      ]
-    },
-    {
-      "id": "KSV-0056",
-      "targets": [
-        "infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
-        "infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
-        "services/jenkins/serviceaccount.yaml",
-        "services/maintenance/disable-k3s-traefik-rbac.yaml",
-        "services/maintenance/k3s-traefik-cleanup-rbac.yaml"
-      ]
-    },
-    {
-      "id": "KSV-0114",
-      "targets": [
-        "infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml"
-      ]
-    },
-    {
-      "id": "KSV-0118",
-      "targets": [
-        "infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
-        "infrastructure/core/coredns-deployment.yaml",
-        "infrastructure/core/ntp-sync-daemonset.yaml",
-        "infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
-        "infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
-        "infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
-        "infrastructure/longhorn/core/vault-sync-deployment.yaml",
-        "infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
-        "infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
-        "infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
-        "infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
-        "infrastructure/postgres/statefulset.yaml",
-        "infrastructure/vault-csi/vault-csi-provider.yaml",
-        "services/ai-llm/deployment.yaml",
-        "services/bstein-dev-home/backend-deployment.yaml",
-        "services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
-        "services/bstein-dev-home/frontend-deployment.yaml",
-        "services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
-        "services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
-        "services/bstein-dev-home/vault-sync-deployment.yaml",
-        "services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
-        "services/comms/atlasbot-deployment.yaml",
-        "services/comms/coturn.yaml",
-        "services/comms/element-call-deployment.yaml",
-        "services/comms/guest-name-job.yaml",
-        "services/comms/livekit-token-deployment.yaml",
-        "services/comms/livekit.yaml",
-        "services/comms/mas-deployment.yaml",
-        "services/comms/oneoffs/bstein-force-leave-job.yaml",
-        "services/comms/oneoffs/comms-secrets-ensure-job.yaml",
-        "services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
-        "services/comms/oneoffs/mas-db-ensure-job.yaml",
-        "services/comms/oneoffs/mas-local-users-ensure-job.yaml",
-        "services/comms/oneoffs/othrys-kick-numeric-job.yaml",
-        "services/comms/oneoffs/synapse-admin-ensure-job.yaml",
-        "services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
-        "services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
-        "services/comms/oneoffs/synapse-user-seed-job.yaml",
-        "services/comms/pin-othrys-job.yaml",
-        "services/comms/reset-othrys-room-job.yaml",
-        "services/comms/seed-othrys-room.yaml",
-        "services/comms/vault-sync-deployment.yaml",
-        "services/comms/wellknown.yaml",
-        "services/crypto/monerod/deployment.yaml",
-        "services/crypto/wallet-monero-temp/deployment.yaml",
-        "services/crypto/xmr-miner/deployment.yaml",
-        "services/crypto/xmr-miner/vault-sync-deployment.yaml",
-        "services/crypto/xmr-miner/xmrig-daemonset.yaml",
-        "services/finance/firefly-cronjob.yaml",
-        "services/finance/firefly-deployment.yaml",
-        "services/finance/firefly-user-sync-cronjob.yaml",
-        "services/finance/oneoffs/finance-secrets-ensure-job.yaml",
-        "services/gitea/deployment.yaml",
-        "services/harbor/vault-sync-deployment.yaml",
-        "services/health/wger-admin-ensure-cronjob.yaml",
-        "services/health/wger-deployment.yaml",
-        "services/health/wger-user-sync-cronjob.yaml",
-        "services/jellyfin/loader.yaml",
-        "services/jenkins/deployment.yaml",
-        "services/jenkins/vault-sync-deployment.yaml",
-        "services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/ldap-federation-job.yaml",
-        "services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
-        "services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/portal-e2e-client-job.yaml",
-        "services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
-        "services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
-        "services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
-        "services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
-        "services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/realm-settings-job.yaml",
-        "services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
-        "services/keycloak/oneoffs/user-overrides-job.yaml",
-        "services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
-        "services/keycloak/vault-sync-deployment.yaml",
-        "services/logging/node-image-gc-rpi4-daemonset.yaml",
-        "services/logging/node-image-prune-rpi5-daemonset.yaml",
-        "services/logging/node-log-rotation-daemonset.yaml",
-        "services/logging/oauth2-proxy.yaml",
-        "services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
-        "services/logging/oneoffs/opensearch-ism-job.yaml",
-        "services/logging/oneoffs/opensearch-observability-setup-job.yaml",
-        "services/logging/opensearch-prune-cronjob.yaml",
-        "services/logging/vault-sync-deployment.yaml",
-        "services/mailu/mailu-sync-cronjob.yaml",
-        "services/mailu/mailu-sync-listener.yaml",
-        "services/mailu/oneoffs/mailu-sync-job.yaml",
-        "services/mailu/vault-sync-deployment.yaml",
-        "services/mailu/vip-controller.yaml",
-        "services/maintenance/ariadne-deployment.yaml",
-        "services/maintenance/disable-k3s-traefik-daemonset.yaml",
-        "services/maintenance/image-sweeper-cronjob.yaml",
-        "services/maintenance/k3s-agent-restart-daemonset.yaml",
-        "services/maintenance/metis-deployment.yaml",
-        "services/maintenance/metis-k3s-token-sync-cronjob.yaml",
-        "services/maintenance/metis-sentinel-amd64-daemonset.yaml",
-        "services/maintenance/metis-sentinel-arm64-daemonset.yaml",
-        "services/maintenance/node-image-sweeper-daemonset.yaml",
-        "services/maintenance/node-nofile-daemonset.yaml",
-        "services/maintenance/oauth2-proxy-metis.yaml",
-        "services/maintenance/oauth2-proxy-soteria.yaml",
-        "services/maintenance/oneoffs/ariadne-migrate-job.yaml",
-        "services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
-        "services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
-        "services/maintenance/pod-cleaner-cronjob.yaml",
-        "services/maintenance/soteria-deployment.yaml",
-        "services/maintenance/vault-sync-deployment.yaml",
-        "services/monitoring/dcgm-exporter.yaml",
-        "services/monitoring/jetson-tegrastats-exporter.yaml",
-        "services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
-        "services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
-        "services/monitoring/platform-quality-gateway-deployment.yaml",
-        "services/monitoring/platform-quality-suite-probe-cronjob.yaml",
-        "services/monitoring/postmark-exporter-deployment.yaml",
-        "services/monitoring/vault-sync-deployment.yaml",
-        "services/nextcloud/collabora.yaml",
-        "services/oauth2-proxy/deployment.yaml",
-        "services/openldap/statefulset.yaml",
-        "services/outline/deployment.yaml",
-        "services/outline/redis-deployment.yaml",
-        "services/pegasus/vault-sync-deployment.yaml",
-        "services/quality/oauth2-proxy-sonarqube.yaml",
-        "services/quality/sonarqube-deployment.yaml",
-        "services/quality/sonarqube-exporter-deployment.yaml",
-        "services/sui-metrics/base/deployment.yaml",
-        "services/sui-metrics/overlays/atlas/patch-node-selector.yaml",
-        "services/typhon/deployment.yaml",
-        "services/typhon/vault-sync-deployment.yaml",
-        "services/vault/k8s-auth-config-cronjob.yaml",
-        "services/vault/oidc-config-cronjob.yaml",
-        "services/vaultwarden/deployment.yaml"
-      ]
-    },
-    {
-      "id": "KSV-0121",
-      "targets": [
-        "services/logging/node-image-gc-rpi4-daemonset.yaml",
-        "services/logging/node-image-prune-rpi5-daemonset.yaml",
-        "services/logging/node-log-rotation-daemonset.yaml",
-        "services/maintenance/disable-k3s-traefik-daemonset.yaml",
-        "services/maintenance/image-sweeper-cronjob.yaml",
-        "services/maintenance/metis-deployment.yaml",
-        "services/maintenance/node-image-sweeper-daemonset.yaml",
-        "services/maintenance/node-nofile-daemonset.yaml",
-        "services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml"
-      ]
-    }
-  ]
-}
--- a/clusters/atlas/flux-system/applications/atlasbot/image-automation.yaml
+++ b/clusters/atlas/flux-system/applications/atlasbot/image-automation.yaml
@ -0,0 +1,26 @@
+# clusters/atlas/flux-system/applications/atlasbot/image-automation.yaml
+apiVersion: image.toolkit.fluxcd.io/v1
+kind: ImageUpdateAutomation
+metadata:
+  name: atlasbot
+  namespace: ai
+spec:
+  interval: 1m0s
+  sourceRef:
+    kind: GitRepository
+    name: flux-system
+    namespace: flux-system
+  git:
+    checkout:
+      ref:
+        branch: feature/atlasbot
+    commit:
+      author:
+        email: ops@bstein.dev
+        name: flux-bot
+      messageTemplate: "chore(atlasbot): automated image update"
+    push:
+      branch: feature/atlasbot
+  update:
+    strategy: Setters
+    path: services/atlasbot
--- a/clusters/atlas/flux-system/applications/atlasbot/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/atlasbot/kustomization.yaml
@ -0,0 +1,17 @@
+# clusters/atlas/flux-system/applications/atlasbot/kustomization.yaml
+apiVersion: kustomize.toolkit.fluxcd.io/v1
+kind: Kustomization
+metadata:
+  name: atlasbot
+  namespace: flux-system
+spec:
+  interval: 10m
+  prune: true
+  sourceRef:
+    kind: GitRepository
+    name: flux-system
+  path: ./services/atlasbot
+  targetNamespace: ai
+  timeout: 2m
+  dependsOn:
+    - name: ai-llm
--- a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml
+++ b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml
@ -13,14 +13,14 @@ spec:
  git:
    checkout:
      ref:
-        branch: main
+        branch: feature/atlasbot
    commit:
      author:
        email: ops@bstein.dev
        name: flux-bot
      messageTemplate: "chore(bstein-dev-home): automated image update"
    push:
-      branch: main
+      branch: feature/atlasbot
  update:
    strategy: Setters
    path: services/bstein-dev-home
--- a/clusters/atlas/flux-system/applications/comms/image-automation.yaml
+++ b/clusters/atlas/flux-system/applications/comms/image-automation.yaml
@ -0,0 +1,26 @@
+# clusters/atlas/flux-system/applications/comms/image-automation.yaml
+apiVersion: image.toolkit.fluxcd.io/v1
+kind: ImageUpdateAutomation
+metadata:
+  name: comms
+  namespace: comms
+spec:
+  interval: 1m0s
+  sourceRef:
+    kind: GitRepository
+    name: flux-system
+    namespace: flux-system
+  git:
+    checkout:
+      ref:
+        branch: feature/atlasbot
+    commit:
+      author:
+        email: ops@bstein.dev
+        name: flux-bot
+      messageTemplate: "chore(comms): automated image update"
+    push:
+      branch: feature/atlasbot
+  update:
+    strategy: Setters
+    path: services/comms
--- a/clusters/atlas/flux-system/applications/gitea/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/gitea/kustomization.yaml
@ -13,8 +13,4 @@ spec:
    kind: GitRepository
    name: flux-system
    namespace: flux-system
-  dependsOn:
-    - name: longhorn
-    - name: vault
-    - name: postgres
  wait: true
--- a/clusters/atlas/flux-system/applications/harbor/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/harbor/kustomization.yaml
@ -16,6 +16,3 @@ spec:
  wait: false
  dependsOn:
    - name: core
-    - name: longhorn
-    - name: vault
-    - name: postgres
--- a/clusters/atlas/flux-system/applications/jenkins/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/jenkins/kustomization.yaml
@ -25,4 +25,3 @@ spec:
      name: jenkins
      namespace: jenkins
  wait: false
-  timeout: 20m
--- a/clusters/atlas/flux-system/applications/keycloak/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/keycloak/kustomization.yaml
@ -12,8 +12,4 @@ spec:
    name: flux-system
  path: ./services/keycloak
  targetNamespace: sso
-  dependsOn:
-    - name: longhorn
-    - name: vault
-    - name: postgres
  timeout: 2m
--- a/clusters/atlas/flux-system/applications/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/kustomization.yaml
@ -6,6 +6,9 @@ resources:
  - vault/kustomization.yaml
  - vaultwarden/kustomization.yaml
  - comms/kustomization.yaml
+  - comms/image-automation.yaml
+  - atlasbot/kustomization.yaml
+  - atlasbot/image-automation.yaml
  - crypto/kustomization.yaml
  - monerod/kustomization.yaml
  - pegasus/kustomization.yaml
@ -21,12 +24,10 @@ resources:
  - sui-metrics/kustomization.yaml
  - openldap/kustomization.yaml
  - keycloak/kustomization.yaml
-  - quality/kustomization.yaml
  - oauth2-proxy/kustomization.yaml
  - mailu/kustomization.yaml
  - jenkins/kustomization.yaml
  - ai-llm/kustomization.yaml
-  - typhon/kustomization.yaml
  - nextcloud/kustomization.yaml
  - nextcloud-mail-sync/kustomization.yaml
  - outline/kustomization.yaml
--- a/clusters/atlas/flux-system/applications/monerod/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/monerod/kustomization.yaml
@ -16,4 +16,4 @@ spec:
  dependsOn:
    - name: crypto
  wait: true
-  timeout: 15m
+  timeout: 5m
--- a/clusters/atlas/flux-system/applications/quality/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/quality/kustomization.yaml
@ -1,35 +0,0 @@
-# clusters/atlas/flux-system/applications/quality/kustomization.yaml
-apiVersion: kustomize.toolkit.fluxcd.io/v1
-kind: Kustomization
-metadata:
-  name: quality
-  namespace: flux-system
-spec:
-  interval: 10m
-  path: ./services/quality
-  prune: true
-  sourceRef:
-    kind: GitRepository
-    name: flux-system
-  targetNamespace: quality
-  dependsOn:
-    - name: traefik
-    - name: cert-manager
-    - name: keycloak
-    - name: vault
-    - name: postgres
-  healthChecks:
-    - apiVersion: apps/v1
-      kind: Deployment
-      name: sonarqube
-      namespace: quality
-    - apiVersion: apps/v1
-      kind: Deployment
-      name: sonarqube-exporter
-      namespace: quality
-    - apiVersion: apps/v1
-      kind: Deployment
-      name: oauth2-proxy-sonarqube
-      namespace: quality
-  wait: false
-  timeout: 20m
--- a/clusters/atlas/flux-system/applications/typhon/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/typhon/kustomization.yaml
@ -1,29 +0,0 @@
-# clusters/atlas/flux-system/applications/typhon/kustomization.yaml
-apiVersion: kustomize.toolkit.fluxcd.io/v1
-kind: Kustomization
-metadata:
-  name: typhon
-  namespace: flux-system
-spec:
-  interval: 10m
-  path: ./services/typhon
-  prune: true
-  sourceRef:
-    kind: GitRepository
-    name: flux-system
-  targetNamespace: climate
-  dependsOn:
-    - name: vault
-    - name: vault-csi
-    - name: monitoring
-  healthChecks:
-    - apiVersion: apps/v1
-      kind: Deployment
-      name: typhon
-      namespace: climate
-    - apiVersion: v1
-      kind: Service
-      name: typhon
-      namespace: climate
-  wait: false
-  timeout: 20m
--- a/clusters/atlas/flux-system/applications/vault/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/vault/kustomization.yaml
@ -15,5 +15,4 @@ spec:
  prune: true
  wait: true
  dependsOn:
-    - name: longhorn
    - name: helm
--- a/clusters/atlas/flux-system/applications/xmr-miner/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/xmr-miner/kustomization.yaml
@ -17,4 +17,3 @@ spec:
    - name: crypto
    - name: monerod
  wait: true
-  timeout: 30m
--- a/clusters/atlas/flux-system/gotk-sync.yaml
+++ b/clusters/atlas/flux-system/gotk-sync.yaml
@ -9,7 +9,7 @@ metadata:
 spec:
  interval: 1m0s
  ref:
-    branch: main
+    branch: feature/atlasbot
  secretRef:
    name: flux-system-gitea
  url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git
--- a/clusters/atlas/flux-system/platform/kustomization.yaml
+++ b/clusters/atlas/flux-system/platform/kustomization.yaml
@ -16,5 +16,6 @@ resources:
  - longhorn/kustomization.yaml
  - longhorn-ui/kustomization.yaml
  - postgres/kustomization.yaml
+  - nats/kustomization.yaml
  - ../platform/vault-csi/kustomization.yaml
  - ../platform/vault-injector/kustomization.yaml
--- a/clusters/atlas/flux-system/platform/maintenance/image-automation.yaml
+++ b/clusters/atlas/flux-system/platform/maintenance/image-automation.yaml
@ -13,14 +13,14 @@ spec:
  git:
    checkout:
      ref:
-        branch: main
+        branch: feature/atlasbot
    commit:
      author:
        email: ops@bstein.dev
        name: flux-bot
      messageTemplate: "chore(maintenance): automated image update"
    push:
-      branch: main
+      branch: feature/atlasbot
  update:
    strategy: Setters
    path: services/maintenance
--- a/clusters/atlas/flux-system/platform/nats/kustomization.yaml
+++ b/clusters/atlas/flux-system/platform/nats/kustomization.yaml
@ -0,0 +1,21 @@
+# clusters/atlas/flux-system/platform/nats/kustomization.yaml
+apiVersion: kustomize.toolkit.fluxcd.io/v1
+kind: Kustomization
+metadata:
+  name: nats
+  namespace: flux-system
+spec:
+  interval: 10m
+  path: ./infrastructure/nats
+  prune: true
+  force: true
+  sourceRef:
+    kind: GitRepository
+    name: flux-system
+  targetNamespace: nats
+  healthChecks:
+    - apiVersion: apps/v1
+      kind: StatefulSet
+      name: nats
+      namespace: nats
+  wait: true
--- a/clusters/atlas/flux-system/platform/postgres/kustomization.yaml
+++ b/clusters/atlas/flux-system/platform/postgres/kustomization.yaml
@ -14,7 +14,6 @@ spec:
    name: flux-system
  targetNamespace: postgres
  dependsOn:
-    - name: longhorn
    - name: vault
    - name: vault-csi
  healthChecks:
--- a/dockerfiles/Dockerfile.ananke-node-helper
+++ b/dockerfiles/Dockerfile.ananke-node-helper
@ -1,12 +0,0 @@
-FROM debian:bookworm-slim
-
-RUN apt-get update \
-    && apt-get install -y --no-install-recommends \
-        bash \
-        ca-certificates \
-        curl \
-        util-linux \
-        zstd \
-    && rm -rf /var/lib/apt/lists/*
-
-CMD ["/bin/sh"]
--- a/dockerfiles/Dockerfile.comms-guest-tools
+++ b/dockerfiles/Dockerfile.comms-guest-tools
@ -2,8 +2,4 @@ FROM python:3.11-slim

 ENV PIP_DISABLE_PIP_VERSION_CHECK=1

-RUN pip install --no-cache-dir requests psycopg2-binary \
-    && groupadd --system guest-tools \
-    && useradd --system --uid 65532 --gid guest-tools --home-dir /nonexistent --shell /usr/sbin/nologin guest-tools
-
-USER guest-tools
+RUN pip install --no-cache-dir requests psycopg2-binary
--- a/dockerfiles/Dockerfile.data-prepper
+++ b/dockerfiles/Dockerfile.data-prepper
@ -1,8 +1,16 @@
-# Use the mirrored Harbor artifact so CI does not depend on Docker Hub egress.
-FROM registry.bstein.dev/streaming/data-prepper@sha256:32ac6ad42e0f12da08bebee307e290b17d127b30def9b06eeaffbcbbc5033e83
+FROM --platform=$BUILDPLATFORM opensearchproject/data-prepper:2.8.0 AS source
+
+FROM --platform=$TARGETPLATFORM eclipse-temurin:17-jre

 ENV DATA_PREPPER_PATH=/usr/share/data-prepper

+RUN useradd -u 10001 -M -U -d / -s /usr/sbin/nologin data_prepper \
+  && mkdir -p /var/log/data-prepper
+
+COPY --from=source /usr/share/data-prepper /usr/share/data-prepper
+
+RUN chown -R 10001:10001 /usr/share/data-prepper /var/log/data-prepper
+
 USER 10001
 WORKDIR /usr/share/data-prepper
 CMD ["bin/data-prepper"]
--- a/dockerfiles/Dockerfile.livekit-token-vault
+++ b/dockerfiles/Dockerfile.livekit-token-vault
@ -1,13 +1,10 @@
 FROM ghcr.io/element-hq/lk-jwt-service:0.3.0 AS base

 FROM alpine:3.20
-RUN apk add --no-cache ca-certificates \
-    && addgroup -S livekit-token \
-    && adduser -S -D -H -u 65532 -G livekit-token livekit-token
+RUN apk add --no-cache ca-certificates
 COPY --from=base /lk-jwt-service /lk-jwt-service
 COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
 RUN chmod 0755 /entrypoint.sh

-USER livekit-token
 ENTRYPOINT ["/entrypoint.sh"]
 CMD ["/lk-jwt-service"]
--- a/dockerfiles/Dockerfile.monero-p2pool
+++ b/dockerfiles/Dockerfile.monero-p2pool
@ -29,12 +29,10 @@ FROM ${DEBIAN_IMAGE}
 RUN set -eux; \
    apt-get update; \
    apt-get install -y --no-install-recommends ca-certificates; \
-    update-ca-certificates; rm -rf /var/lib/apt/lists/*; \
-    groupadd --system p2pool; \
-    useradd --system --uid 65532 --gid p2pool --home-dir /nonexistent --shell /usr/sbin/nologin p2pool
+    update-ca-certificates; rm -rf /var/lib/apt/lists/*
 COPY --from=fetch /out/p2pool /usr/local/bin/p2pool

 RUN /usr/local/bin/p2pool --version || true
 EXPOSE 3333
-USER p2pool
 ENTRYPOINT ["/usr/local/bin/p2pool"]
+
--- a/dockerfiles/Dockerfile.monero-wallet-rpc
+++ b/dockerfiles/Dockerfile.monero-wallet-rpc
@ -26,12 +26,9 @@ RUN set -eux; \
    curl -fsSL "$URL" -o /opt/monero/monero.tar.bz2; \
    tar -xjf /opt/monero/monero.tar.bz2 -C /opt/monero --strip-components=1; \
    install -m 0755 /opt/monero/monero-wallet-rpc /usr/local/bin/monero-wallet-rpc; \
-    rm -f /opt/monero/monero.tar.bz2; \
-    groupadd --system monero; \
-    useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero
+    rm -f /opt/monero/monero.tar.bz2

 ENV PATH="/usr/local/bin:/usr/bin:/bin"
 RUN /usr/local/bin/monero-wallet-rpc --version || true

 EXPOSE 18083
-USER monero
--- a/dockerfiles/Dockerfile.monerod
+++ b/dockerfiles/Dockerfile.monerod
@ -23,14 +23,10 @@ RUN set -eux; \
    mkdir -p /opt/monero; \
    tar -xjf /tmp/monero.tar.bz2 -C /opt/monero --strip-components=1; \
    rm -f /tmp/monero.tar.bz2; \
-    groupadd --system monero; \
-    useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero; \
    mkdir -p /data; \
-    chown monero:monero /data; \
    chmod 0770 /data

 ENV LD_LIBRARY_PATH=/opt/monero:/opt/monero/lib \
    PATH="/opt/monero:${PATH}"

-USER monero
 CMD ["/opt/monero/monerod", "--version"]
--- a/dockerfiles/Dockerfile.oauth2-proxy-vault
+++ b/dockerfiles/Dockerfile.oauth2-proxy-vault
@ -1,13 +1,10 @@
 FROM quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 AS base

 FROM alpine:3.20
-RUN apk add --no-cache ca-certificates \
-    && addgroup -S oauth2-proxy \
-    && adduser -S -D -H -u 65532 -G oauth2-proxy oauth2-proxy
+RUN apk add --no-cache ca-certificates
 COPY --from=base /bin/oauth2-proxy /bin/oauth2-proxy
 COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
 RUN chmod 0755 /entrypoint.sh

-USER oauth2-proxy
 ENTRYPOINT ["/entrypoint.sh"]
 CMD ["/bin/oauth2-proxy"]
--- a/dockerfiles/Dockerfile.pegasus-vault
+++ b/dockerfiles/Dockerfile.pegasus-vault
@ -1,13 +1,10 @@
 FROM registry.bstein.dev/streaming/pegasus:1.2.32 AS base

 FROM alpine:3.20
-RUN apk add --no-cache ca-certificates \
-    && addgroup -S pegasus \
-    && adduser -S -D -H -u 65532 -G pegasus pegasus
+RUN apk add --no-cache ca-certificates
 COPY --from=base /pegasus /pegasus
 COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
 RUN chmod 0755 /entrypoint.sh

-USER pegasus
 ENTRYPOINT ["/entrypoint.sh"]
 CMD ["/pegasus"]
--- a/dockerfiles/Dockerfile.quality-tools
+++ b/dockerfiles/Dockerfile.quality-tools
@ -1,48 +0,0 @@
-# dockerfiles/Dockerfile.quality-tools
-FROM debian:bookworm-slim
-
-ARG SONAR_SCANNER_VERSION=8.0.1.6346
-ARG TRIVY_VERSION=0.70.0
-ENV TRIVY_CACHE_DIR=/opt/trivy-cache
-
-SHELL ["/bin/bash", "-o", "pipefail", "-c"]
-
-RUN apt-get update \
-  && apt-get install -y --no-install-recommends \
-    bash \
-    ca-certificates \
-    curl \
-    git \
-    jq \
-    unzip \
-  && rm -rf /var/lib/apt/lists/* \
-  && groupadd --system quality-tools \
-  && useradd --system --uid 65532 --gid quality-tools --home-dir /nonexistent --shell /usr/sbin/nologin quality-tools
-
-RUN set -eux; \
-  scanner_zip="sonar-scanner-cli-${SONAR_SCANNER_VERSION}-linux-aarch64.zip"; \
-  base_url="https://binaries.sonarsource.com/Distribution/sonar-scanner-cli"; \
-  curl -fsSL "${base_url}/${scanner_zip}" -o "/tmp/${scanner_zip}"; \
-  curl -fsSL "${base_url}/${scanner_zip}.sha256" -o "/tmp/${scanner_zip}.sha256"; \
-  printf '%s  %s\n' "$(cat "/tmp/${scanner_zip}.sha256")" "/tmp/${scanner_zip}" | sha256sum -c -; \
-  unzip -q "/tmp/${scanner_zip}" -d /opt; \
-  ln -s "/opt/sonar-scanner-${SONAR_SCANNER_VERSION}-linux-aarch64/bin/sonar-scanner" /usr/local/bin/sonar-scanner; \
-  rm -f "/tmp/${scanner_zip}" "/tmp/${scanner_zip}.sha256"
-
-RUN set -eux; \
-  trivy_tgz="trivy_${TRIVY_VERSION}_Linux-ARM64.tar.gz"; \
-  curl -fsSL "https://github.com/aquasecurity/trivy/releases/download/v${TRIVY_VERSION}/${trivy_tgz}" -o "/tmp/${trivy_tgz}"; \
-  tar -C /usr/local/bin -xzf "/tmp/${trivy_tgz}" trivy; \
-  rm -f "/tmp/${trivy_tgz}"; \
-  trivy --version; \
-  sonar-scanner -v
-
-RUN set -eux; \
-  mkdir -p "${TRIVY_CACHE_DIR}"; \
-  trivy image --download-db-only --cache-dir "${TRIVY_CACHE_DIR}"; \
-  chmod -R a+rX "${TRIVY_CACHE_DIR}"; \
-  mkdir -p /workspace; \
-  chown quality-tools:quality-tools /workspace
-
-WORKDIR /workspace
-USER quality-tools
--- a/dockerfiles/Dockerfile.synapse-admin-ensure
+++ b/dockerfiles/Dockerfile.synapse-admin-ensure
@ -0,0 +1,3 @@
+FROM python:3.11-slim
+
+RUN pip install --no-cache-dir psycopg2-binary bcrypt
--- a/infrastructure/cert-manager/helmrelease.yaml
+++ b/infrastructure/cert-manager/helmrelease.yaml
@ -27,42 +27,10 @@ spec:
    timeout: 10m
  values:
    installCRDs: true
-    extraArgs:
-      - --acme-http01-solver-nameservers=1.1.1.1:53,8.8.8.8:53
    nodeSelector:
      node-role.kubernetes.io/worker: "true"
    affinity:
      nodeAffinity:
-        preferredDuringSchedulingIgnoredDuringExecution:
-          - weight: 100
-            preference:
-              matchExpressions:
-                - key: atlas.bstein.dev/spillover
-                  operator: DoesNotExist
-          - weight: 95
-            preference:
-              matchExpressions:
-                - key: kubernetes.io/hostname
-                  operator: NotIn
-                  values:
-                    - titan-13
-                    - titan-15
-                    - titan-17
-                    - titan-19
-          - weight: 90
-            preference:
-              matchExpressions:
-                - key: hardware
-                  operator: In
-                  values:
-                    - rpi5
-          - weight: 50
-            preference:
-              matchExpressions:
-                - key: hardware
-                  operator: In
-                  values:
-                    - rpi4
        requiredDuringSchedulingIgnoredDuringExecution:
          nodeSelectorTerms:
            - matchExpressions:
@ -76,36 +44,6 @@ spec:
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              preference:
-                matchExpressions:
-                  - key: atlas.bstein.dev/spillover
-                    operator: DoesNotExist
-            - weight: 95
-              preference:
-                matchExpressions:
-                  - key: kubernetes.io/hostname
-                    operator: NotIn
-                    values:
-                      - titan-13
-                      - titan-15
-                      - titan-17
-                      - titan-19
-            - weight: 90
-              preference:
-                matchExpressions:
-                  - key: hardware
-                    operator: In
-                    values:
-                      - rpi5
-            - weight: 50
-              preference:
-                matchExpressions:
-                  - key: hardware
-                    operator: In
-                    values:
-                      - rpi4
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
@ -119,36 +57,6 @@ spec:
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              preference:
-                matchExpressions:
-                  - key: atlas.bstein.dev/spillover
-                    operator: DoesNotExist
-            - weight: 95
-              preference:
-                matchExpressions:
-                  - key: kubernetes.io/hostname
-                    operator: NotIn
-                    values:
-                      - titan-13
-                      - titan-15
-                      - titan-17
-                      - titan-19
-            - weight: 90
-              preference:
-                matchExpressions:
-                  - key: hardware
-                    operator: In
-                    values:
-                      - rpi5
-            - weight: 50
-              preference:
-                matchExpressions:
-                  - key: hardware
-                    operator: In
-                    values:
-                      - rpi4
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
--- a/infrastructure/core/coredns-deployment.yaml
+++ b/infrastructure/core/coredns-deployment.yaml
@ -26,7 +26,7 @@ spec:
    spec:
      containers:
        - name: coredns
-          image: registry.k8s.io/coredns/coredns:v1.12.1
+          image: registry.bstein.dev/infra/coredns:1.12.1
          imagePullPolicy: IfNotPresent
          args:
            - -conf
--- a/infrastructure/core/kustomization.yaml
+++ b/infrastructure/core/kustomization.yaml
@ -6,6 +6,7 @@ resources:
  - ../modules/profiles/atlas-ha
  - coredns-custom.yaml
  - coredns-deployment.yaml
+  - longhorn-node-taints.yaml
  - ntp-sync-daemonset.yaml
  - ../sources/cert-manager/letsencrypt.yaml
  - ../sources/cert-manager/letsencrypt-prod.yaml
--- a/infrastructure/core/longhorn-node-taints.yaml
+++ b/infrastructure/core/longhorn-node-taints.yaml
@ -0,0 +1,40 @@
+# infrastructure/core/longhorn-node-taints.yaml
+apiVersion: v1
+kind: Node
+metadata:
+  name: titan-13
+spec:
+  taints:
+    - key: longhorn
+      value: "true"
+      effect: PreferNoSchedule
+---
+apiVersion: v1
+kind: Node
+metadata:
+  name: titan-15
+spec:
+  taints:
+    - key: longhorn
+      value: "true"
+      effect: PreferNoSchedule
+---
+apiVersion: v1
+kind: Node
+metadata:
+  name: titan-17
+spec:
+  taints:
+    - key: longhorn
+      value: "true"
+      effect: PreferNoSchedule
+---
+apiVersion: v1
+kind: Node
+metadata:
+  name: titan-19
+spec:
+  taints:
+    - key: longhorn
+      value: "true"
+      effect: PreferNoSchedule
--- a/infrastructure/longhorn/core/backup-target.yaml
+++ b/infrastructure/longhorn/core/backup-target.yaml
@ -0,0 +1,10 @@
+# infrastructure/longhorn/core/backup-target.yaml
+apiVersion: longhorn.io/v1beta2
+kind: BackupTarget
+metadata:
+  name: default
+  namespace: longhorn-system
+spec:
+  backupTargetURL: "s3://atlas-soteria@us-west-004/"
+  credentialSecret: longhorn-backup-b2
+  pollInterval: 5m0s
--- a/infrastructure/longhorn/core/helmrelease.yaml
+++ b/infrastructure/longhorn/core/helmrelease.yaml
@ -6,6 +6,39 @@ metadata:
  namespace: longhorn-system
 spec:
  interval: 30m
+  postRenderers:
+    - kustomize:
+        patches:
+          - target:
+              kind: Service
+              name: longhorn-conversion-webhook
+              namespace: longhorn-system
+            patch: |
+              - op: add
+                path: /spec/publishNotReadyAddresses
+                value: true
+          - target:
+              kind: Service
+              name: longhorn-admission-webhook
+              namespace: longhorn-system
+            patch: |
+              - op: add
+                path: /spec/publishNotReadyAddresses
+                value: true
+          - target:
+              kind: DaemonSet
+              name: longhorn-manager
+              namespace: longhorn-system
+            patch: |
+              - op: replace
+                path: /spec/template/spec/containers/0/readinessProbe/httpGet/path
+                value: /v1/healthz
+              - op: replace
+                path: /spec/template/spec/containers/0/readinessProbe/httpGet/port
+                value: 9500
+              - op: replace
+                path: /spec/template/spec/containers/0/readinessProbe/httpGet/scheme
+                value: HTTP
  chart:
    spec:
      chart: longhorn
@ -26,9 +59,6 @@ spec:
    cleanupOnFail: true
    timeout: 15m
  values:
-    global:
-      nodeSelector:
-        longhorn-host: "true"
    service:
      ui:
        type: NodePort
@ -37,7 +67,7 @@ spec:
      createSecret: false
      registrySecret: longhorn-registry
    image:
-      pullPolicy: Always
+      pullPolicy: IfNotPresent
      longhorn:
        engine:
          repository: registry.bstein.dev/infra/longhorn-engine
@ -80,13 +110,4 @@ spec:
          repository: registry.bstein.dev/infra/longhorn-livenessprobe
          tag: v2.16.0
    defaultSettings:
-      systemManagedPodsImagePullPolicy: Always
-    longhornManager:
-      nodeSelector:
-        longhorn-host: "true"
-    longhornDriver:
-      nodeSelector:
-        longhorn-host: "true"
-    longhornUI:
-      nodeSelector:
-        longhorn-host: "true"
+      systemManagedPodsImagePullPolicy: IfNotPresent
--- a/infrastructure/longhorn/core/kustomization.yaml
+++ b/infrastructure/longhorn/core/kustomization.yaml
@ -6,17 +6,14 @@ resources:
  - vault-serviceaccount.yaml
  - secretproviderclass.yaml
  - vault-sync-deployment.yaml
+  - backup-target.yaml
  - helmrelease.yaml
  - longhorn-settings-ensure-job.yaml
-  - longhorn-disk-tags-ensure-job.yaml

 configMapGenerator:
  - name: longhorn-settings-ensure-script
    files:
      - longhorn_settings_ensure.sh=scripts/longhorn_settings_ensure.sh
-  - name: longhorn-disk-tags-ensure-script
-    files:
-      - longhorn_disk_tags_ensure.py=scripts/longhorn_disk_tags_ensure.py

 generatorOptions:
  disableNameSuffixHash: true
--- a/infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml
+++ b/infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml
@ -1,36 +0,0 @@
-# infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml
-apiVersion: batch/v1
-kind: Job
-metadata:
-  name: longhorn-disk-tags-ensure-1
-  namespace: longhorn-system
-spec:
-  backoffLimit: 0
-  ttlSecondsAfterFinished: 3600
-  template:
-    spec:
-      serviceAccountName: longhorn-service-account
-      restartPolicy: Never
-      volumes:
-        - name: longhorn-disk-tags-ensure-script
-          configMap:
-            name: longhorn-disk-tags-ensure-script
-            defaultMode: 0555
-      affinity:
-        nodeAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-            nodeSelectorTerms:
-              - matchExpressions:
-                  - key: kubernetes.io/arch
-                    operator: In
-                    values: ["arm64"]
-                  - key: node-role.kubernetes.io/worker
-                    operator: Exists
-      containers:
-        - name: apply
-          image: python:3.12.9-alpine3.20
-          command: ["python", "/scripts/longhorn_disk_tags_ensure.py"]
-          volumeMounts:
-            - name: longhorn-disk-tags-ensure-script
-              mountPath: /scripts
-              readOnly: true
--- a/infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml
+++ b/infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml
@ -2,11 +2,10 @@
 apiVersion: batch/v1
 kind: Job
 metadata:
-  name: longhorn-settings-ensure-7
+  name: longhorn-settings-ensure-4
  namespace: longhorn-system
 spec:
  backoffLimit: 0
-  activeDeadlineSeconds: 240
  ttlSecondsAfterFinished: 3600
  template:
    spec:
--- a/infrastructure/longhorn/core/scripts/longhorn_disk_tags_ensure.py
+++ b/infrastructure/longhorn/core/scripts/longhorn_disk_tags_ensure.py
@ -1,100 +0,0 @@
-#!/usr/bin/env python3
-"""Reconcile Longhorn disk tags for the Titan longhorn storage classes.
-
-The astreae/asteria storageclasses select Longhorn disks by tag. The current
-nodes already have the right disk paths, but the tag fields can drift to empty
-after node recovery. This job patches the live Longhorn Node CRs back to the
-expected tags so PVC provisioning keeps working.
-"""
-
-from __future__ import annotations
-
-import json
-import os
-import ssl
-import urllib.request
-
-
-LONGHORN_NS = "longhorn-system"
-LONGHORN_API = "/apis/longhorn.io/v1beta2/namespaces/{namespace}/nodes"
-DESIRED_TAGS = {
-    "/mnt/astreae": "astreae",
-    "/mnt/asteria": "asteria",
-}
-
-
-def api_base() -> str:
-    host = os.environ.get("KUBERNETES_SERVICE_HOST")
-    port = os.environ.get("KUBERNETES_SERVICE_PORT", "443")
-    if not host:
-        raise SystemExit("missing KUBERNETES_SERVICE_HOST")
-    return f"https://{host}:{port}"
-
-
-def token() -> str:
-    path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
-    with open(path, "r", encoding="utf-8") as fh:
-        return fh.read().strip()
-
-
-def ca_context() -> ssl.SSLContext:
-    cafile = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
-    return ssl.create_default_context(cafile=cafile)
-
-
-def request_json(method: str, path: str, body: dict | None = None) -> dict:
-    req = urllib.request.Request(
-        f"{api_base()}{path}",
-        method=method,
-        headers={
-            "Authorization": f"Bearer {token()}",
-            "Content-Type": "application/merge-patch+json",
-            "Accept": "application/json",
-        },
-        data=None if body is None else json.dumps(body).encode("utf-8"),
-    )
-    with urllib.request.urlopen(req, context=ca_context(), timeout=20) as resp:
-        payload = resp.read()
-    return json.loads(payload) if payload else {}
-
-
-def list_nodes() -> list[dict]:
-    data = request_json("GET", LONGHORN_API.format(namespace=LONGHORN_NS))
-    return data.get("items", [])
-
-
-def patch_disk_tags(node_name: str, disk_name: str, desired_tag: str) -> None:
-    body = {"spec": {"disks": {disk_name: {"tags": [desired_tag]}}}}
-    request_json(
-        "PATCH",
-        f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}",
-        body=body,
-    )
-
-
-def main() -> int:
-    changed = 0
-    skipped = 0
-
-    for node in list_nodes():
-        name = node.get("metadata", {}).get("name", "")
-        spec_disks = node.get("spec", {}).get("disks", {}) or {}
-        for disk_name, disk in spec_disks.items():
-            disk_path = disk.get("path")
-            desired_tag = DESIRED_TAGS.get(disk_path)
-            if not desired_tag:
-                continue
-            current_tags = disk.get("tags") or []
-            if current_tags == [desired_tag]:
-                skipped += 1
-                continue
-            print(f"patching {name}:{disk_name} path={disk_path} tags={current_tags!r} -> {[desired_tag]!r}")
-            patch_disk_tags(name, disk_name, desired_tag)
-            changed += 1
-
-    print(f"done: changed={changed} skipped={skipped}")
-    return 0
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
--- a/infrastructure/longhorn/core/scripts/longhorn_settings_ensure.sh
+++ b/infrastructure/longhorn/core/scripts/longhorn_settings_ensure.sh
@ -4,12 +4,11 @@ set -eu
 # Longhorn blocks direct CR patches for some settings; use the internal API instead.

 api_base="http://longhorn-backend.longhorn-system.svc:9500/v1/settings"
-curl_opts="-fsS --connect-timeout 3 --max-time 15"

 wait_for_api() {
  attempts=30
  while [ "${attempts}" -gt 0 ]; do
-    if curl ${curl_opts} "${api_base}" >/dev/null 2>&1; then
+    if curl -fsS "${api_base}" >/dev/null 2>&1; then
      return 0
    fi
    attempts=$((attempts - 1))
@ -23,14 +22,14 @@ update_setting() {
  name="$1"
  value="$2"

-  current="$(curl ${curl_opts} "${api_base}/${name}" || true)"
+  current="$(curl -fsS "${api_base}/${name}" || true)"
  if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then
    echo "Setting ${name} already set."
    return 0
  fi

  echo "Setting ${name} -> ${value}"
-  curl ${curl_opts} -X PUT \
+  curl -fsS -X PUT \
    -H "Content-Type: application/json" \
    -d "{\"value\":\"${value}\"}" \
    "${api_base}/${name}" >/dev/null
@ -41,7 +40,3 @@ update_setting default-engine-image "registry.bstein.dev/infra/longhorn-engine:v
 update_setting default-instance-manager-image "registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2"
 update_setting default-backing-image-manager-image "registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2"
 update_setting support-bundle-manager-image "registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56"
-# Keep storage-heavy nodes from getting hammered by rebuild storms and skew.
-update_setting replica-auto-balance "best-effort"
-update_setting concurrent-replica-rebuild-per-node-limit "2"
-update_setting node-down-pod-deletion-policy "delete-both-statefulset-and-deployment-pod"
--- a/infrastructure/longhorn/core/secretproviderclass.yaml
+++ b/infrastructure/longhorn/core/secretproviderclass.yaml
@ -13,13 +13,13 @@ spec:
      - objectName: "harbor-pull__dockerconfigjson"
        secretPath: "kv/data/atlas/shared/harbor-pull"
        secretKey: "dockerconfigjson"
-      - objectName: "longhorn-backup-b2__AWS_ACCESS_KEY_ID"
+      - objectName: "longhorn_backup__AWS_ACCESS_KEY_ID"
        secretPath: "kv/data/atlas/longhorn/backup-b2"
        secretKey: "AWS_ACCESS_KEY_ID"
-      - objectName: "longhorn-backup-b2__AWS_SECRET_ACCESS_KEY"
+      - objectName: "longhorn_backup__AWS_SECRET_ACCESS_KEY"
        secretPath: "kv/data/atlas/longhorn/backup-b2"
        secretKey: "AWS_SECRET_ACCESS_KEY"
-      - objectName: "longhorn-backup-b2__AWS_ENDPOINTS"
+      - objectName: "longhorn_backup__AWS_ENDPOINTS"
        secretPath: "kv/data/atlas/longhorn/backup-b2"
        secretKey: "AWS_ENDPOINTS"
  secretObjects:
@ -31,9 +31,9 @@ spec:
    - secretName: longhorn-backup-b2
      type: Opaque
      data:
-        - objectName: longhorn-backup-b2__AWS_ACCESS_KEY_ID
+        - objectName: longhorn_backup__AWS_ACCESS_KEY_ID
          key: AWS_ACCESS_KEY_ID
-        - objectName: longhorn-backup-b2__AWS_SECRET_ACCESS_KEY
+        - objectName: longhorn_backup__AWS_SECRET_ACCESS_KEY
          key: AWS_SECRET_ACCESS_KEY
-        - objectName: longhorn-backup-b2__AWS_ENDPOINTS
+        - objectName: longhorn_backup__AWS_ENDPOINTS
          key: AWS_ENDPOINTS
--- a/infrastructure/longhorn/core/vault-sync-deployment.yaml
+++ b/infrastructure/longhorn/core/vault-sync-deployment.yaml
@ -26,16 +26,6 @@ spec:
                  - key: hardware
                    operator: In
                    values: ["rpi5", "rpi4"]
-            - weight: 90
-              preference:
-                matchExpressions:
-                  - key: kubernetes.io/hostname
-                    operator: NotIn
-                    values:
-                      - titan-13
-                      - titan-15
-                      - titan-17
-                      - titan-19
      containers:
        - name: sync
          image: alpine:3.20
--- a/infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml
+++ b/infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml
@ -78,7 +78,6 @@ spec:
            - --upstream=http://longhorn-frontend.longhorn-system.svc.cluster.local
            - --http-address=0.0.0.0:4180
            - --skip-provider-button=true
-            - --approval-prompt=auto
            - --skip-jwt-bearer-tokens=true
            - --oidc-groups-claim=groups
            - --cookie-domain=longhorn.bstein.dev
--- a/infrastructure/nats/configmap.yaml
+++ b/infrastructure/nats/configmap.yaml
@ -0,0 +1,17 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: nats-config
+  namespace: nats
+  labels:
+    app: nats
+    component: config
+  annotations:
+    description: "NATS JetStream configuration"
+data:
+  nats.conf: |
+    jetstream {
+      store_dir: /data
+      max_mem_store: 128MB
+      max_file_store: 1GB
+    }
--- a/infrastructure/nats/kustomization.yaml
+++ b/infrastructure/nats/kustomization.yaml
@ -0,0 +1,7 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+  - namespace.yaml
+  - configmap.yaml
+  - service.yaml
+  - statefulset.yaml
--- a/infrastructure/nats/namespace.yaml
+++ b/infrastructure/nats/namespace.yaml
@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: nats
--- a/infrastructure/nats/service.yaml
+++ b/infrastructure/nats/service.yaml
@ -0,0 +1,17 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: nats
+  namespace: nats
+  labels:
+    app: nats
+spec:
+  selector:
+    app: nats
+  ports:
+    - name: client
+      port: 4222
+      targetPort: 4222
+    - name: monitoring
+      port: 8222
+      targetPort: 8222
--- a/infrastructure/nats/statefulset.yaml
+++ b/infrastructure/nats/statefulset.yaml
@ -0,0 +1,54 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: nats
+  namespace: nats
+  labels:
+    app: nats
+spec:
+  serviceName: nats
+  replicas: 1
+  selector:
+    matchLabels:
+      app: nats
+  template:
+    metadata:
+      labels:
+        app: nats
+    spec:
+      containers:
+        - name: nats
+          image: nats:2.10.18
+          args:
+            - "-c"
+            - "/etc/nats/nats.conf"
+          ports:
+            - name: client
+              containerPort: 4222
+            - name: monitoring
+              containerPort: 8222
+          volumeMounts:
+            - name: config
+              mountPath: /etc/nats
+            - name: data
+              mountPath: /data
+          resources:
+            requests:
+              cpu: 100m
+              memory: 256Mi
+            limits:
+              cpu: 500m
+              memory: 512Mi
+      volumes:
+        - name: config
+          configMap:
+            name: nats-config
+  volumeClaimTemplates:
+    - metadata:
+        name: data
+      spec:
+        accessModes:
+          - ReadWriteOnce
+        resources:
+          requests:
+            storage: 2Gi
--- a/infrastructure/postgres/statefulset.yaml
+++ b/infrastructure/postgres/statefulset.yaml
@ -25,7 +25,6 @@ spec:
      serviceAccountName: postgres-vault
      nodeSelector:
        node-role.kubernetes.io/worker: "true"
-        hardware: rpi5
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
@ -36,17 +35,7 @@ spec:
                    values: ["true"]
                  - key: hardware
                    operator: In
-                    values: ["rpi5"]
-                  - key: kubernetes.io/hostname
-                    operator: NotIn
-                    values: ["titan-06"]
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              preference:
-                matchExpressions:
-                  - key: kubernetes.io/hostname
-                    operator: In
-                    values: ["titan-05", "titan-07", "titan-08", "titan-11"]
+                    values: ["rpi4", "rpi5"]
      containers:
        - name: postgres
          image: postgres:15
--- a/infrastructure/traefik/clusterrole.yaml
+++ b/infrastructure/traefik/clusterrole.yaml
@ -2,7 +2,7 @@
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
-  name: atlas-traefik-ingress-controller
+  name: traefik-ingress-controller
 rules:
 - apiGroups:
  - ""
--- a/infrastructure/traefik/clusterrolebinding.yaml
+++ b/infrastructure/traefik/clusterrolebinding.yaml
@ -2,12 +2,12 @@
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
 metadata:
-  name: atlas-traefik-ingress-controller
+  name: traefik-ingress-controller
 roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
-  name: atlas-traefik-ingress-controller
+  name: traefik-ingress-controller
 subjects:
 - kind: ServiceAccount
-  name: atlas-traefik-ingress-controller
+  name: traefik-ingress-controller
  namespace: traefik
--- a/infrastructure/traefik/deployment.yaml
+++ b/infrastructure/traefik/deployment.yaml
@ -70,42 +70,10 @@ items:
        dnsPolicy: ClusterFirst
        nodeSelector:
          node-role.kubernetes.io/worker: "true"
-        affinity:
-          nodeAffinity:
-            preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              preference:
-                matchExpressions:
-                - key: atlas.bstein.dev/spillover
-                  operator: DoesNotExist
-            - weight: 95
-              preference:
-                matchExpressions:
-                - key: kubernetes.io/hostname
-                  operator: NotIn
-                  values:
-                  - titan-13
-                  - titan-15
-                  - titan-17
-                  - titan-19
-            - weight: 90
-              preference:
-                matchExpressions:
-                - key: hardware
-                  operator: In
-                  values:
-                  - rpi5
-            - weight: 50
-              preference:
-                matchExpressions:
-                - key: hardware
-                  operator: In
-                  values:
-                  - rpi4
        restartPolicy: Always
        schedulerName: default-scheduler
-        serviceAccount: atlas-traefik-ingress-controller
-        serviceAccountName: atlas-traefik-ingress-controller
+        serviceAccount: traefik-ingress-controller
+        serviceAccountName: traefik-ingress-controller
        terminationGracePeriodSeconds: 30
 kind: List
 metadata: {}
--- a/infrastructure/traefik/ingressclass.yaml
+++ b/infrastructure/traefik/ingressclass.yaml
@ -1,9 +0,0 @@
-# infrastructure/traefik/ingressclass.yaml
-apiVersion: networking.k8s.io/v1
-kind: IngressClass
-metadata:
-  name: traefik
-  annotations:
-    ingressclass.kubernetes.io/is-default-class: "true"
-spec:
-  controller: traefik.io/ingress-controller
--- a/infrastructure/traefik/kustomization.yaml
+++ b/infrastructure/traefik/kustomization.yaml
@ -6,7 +6,6 @@ metadata:
  namespace: flux-system
 resources:
  - crds.yaml
-  - ingressclass.yaml
  - deployment.yaml
  - serviceaccount.yaml
  - clusterrole.yaml
--- a/infrastructure/traefik/serviceaccount.yaml
+++ b/infrastructure/traefik/serviceaccount.yaml
@ -2,5 +2,5 @@
 apiVersion: v1
 kind: ServiceAccount
 metadata:
-  name: atlas-traefik-ingress-controller
+  name: traefik-ingress-controller
  namespace: traefik
--- a/infrastructure/vault-injector/helmrelease.yaml
+++ b/infrastructure/vault-injector/helmrelease.yaml
@ -41,12 +41,3 @@ spec:
        failurePolicy: Ignore
      nodeSelector:
        node-role.kubernetes.io/worker: "true"
-      affinity:
-        nodeAffinity:
-          preferredDuringSchedulingIgnoredDuringExecution:
-            - weight: 100
-              preference:
-                matchExpressions:
-                  - key: kubernetes.io/hostname
-                    operator: NotIn
-                    values: ["titan-13", "titan-15", "titan-17", "titan-19"]
--- a/knowledge/runbooks/cluster-power-recovery.md
+++ b/knowledge/runbooks/cluster-power-recovery.md
@ -1,152 +0,0 @@
-Atlas Cluster Power Recovery (Graceful Shutdown/Startup)
-
-Purpose
- Provide a safe operator flow for planned power events and cold-boot recovery.
- Avoid the Flux/Gitea bootstrap deadlock by using a local bootstrap fallback path.
- Break the Harbor self-hosting deadlock by seeding Harbor runtime images from a control-host bundle.
- Refuse bootstrap when UPS charge is too low, and fall back to fast shutdown if a second outage hits mid-recovery.
-
-Bootstrapping risk to remember
- Flux source is Git over SSH to `scm.bstein.dev` (Gitea).
- Gitea itself is a Flux-managed workload and depends on storage + database.
- Harbor is also critical, but it is not part of the first recovery stage because Harbor serves its own runtime images.
- On cold boot, if Flux cannot fetch source before Gitea is up, reconciliation can stall.
- Recovery path: bring control plane and workers up, then locally apply minimal platform stack (`core -> helm -> longhorn -> metallb -> traefik -> vault-csi -> vault-injector -> vault -> postgres -> gitea`), then seed Harbor images onto the Harbor node from a control-host bundle, then resume/reconcile Flux. Harbor is a later recovery stage after storage, Vault, Postgres, and Gitea are back.
-
-Script
- `scripts/cluster_power_recovery.sh`
- `scripts/cluster_power_console.sh`
- Modes:
-  - `prepare`
-  - `shutdown`
-  - `harbor-seed`
-  - `startup`
-  - `status`
- Default is dry-run. Add `--execute` to actually perform actions.
-
-Dry-run examples
- Shutdown preview:
-  - `scripts/cluster_power_recovery.sh shutdown --skip-etcd-snapshot --skip-drain`
- Startup preview:
-  - `scripts/cluster_power_recovery.sh startup`
- Harbor seed preview:
-  - `scripts/cluster_power_recovery.sh harbor-seed`
-
-Execute examples
- Prepare helper image on every node:
-  - `scripts/cluster_power_recovery.sh prepare --execute`
- Seed Harbor runtime images onto `titan-05` from the control-host bundle:
-  - `scripts/cluster_power_recovery.sh harbor-seed --execute`
- Planned shutdown:
-  - `scripts/cluster_power_recovery.sh shutdown --execute`
- Planned startup (canonical branch):
-  - `scripts/cluster_power_recovery.sh startup --execute --force-flux-branch main`
-
-Manual remote console examples
- Canonical operator hosts:
-  - `titan-db`
-  - `tethys` (`titan-24`)
- Both hosts now have:
-  - `~/ananke-tools/cluster_power_recovery.sh`
-  - `~/ananke-tools/cluster_power_console.sh`
-  - `~/ananke-tools/bootstrap/recovery-config.env`
-  - `~/ananke-tools/bootstrap/harbor-bootstrap-images.txt`
-  - `~/ananke-tools/kubeconfig`
-  - `~/ananke-cluster-power`
-  - `~/bin/ananke-cluster-power`
-  - `~/ananke-repo/{infrastructure,services,scripts}`
- Both hosts also keep the Harbor bootstrap bundle at:
-  - `~/.local/share/ananke/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst`
- Remote usage:
-  - `ssh titan-db`
-  - `~/ananke-cluster-power status`
-  - `~/ananke-cluster-power prepare --execute`
-  - `~/ananke-cluster-power shutdown --execute`
-  - `~/ananke-cluster-power startup --execute --force-flux-branch main`
-  - `ssh tethys`
-  - `~/ananke-cluster-power status`
-  - `~/ananke-cluster-power prepare --execute`
-  - `~/ananke-cluster-power shutdown --execute`
-  - `~/ananke-cluster-power startup --execute --force-flux-branch main`
-
-Useful options
- `--shutdown-mode host-poweroff|cluster-only`
- `--expected-flux-branch main`
- `--expected-flux-url ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git`
- `--force-flux-url ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git`
- `--force-flux-branch main`
- `--allow-flux-source-mutation` (required with `--force-flux-url`; breakglass only)
- `--skip-local-bootstrap` (not recommended for cold-start recovery)
- `--skip-harbor-bootstrap` (skip the Harbor recovery stage if you know Harbor should stay deferred)
- `--skip-harbor-seed` (skip bundle import if Harbor images are already cached on the target node)
- `--skip-helper-prewarm`
- `--min-startup-battery 35`
- `--ups-host pyrphoros@localhost`
- `--require-ups-battery`
- `--drain-timeout 180`
- `--emergency-drain-timeout 45`
- `--flux-ready-timeout 1200`
- `--startup-checklist-timeout 900`
- `--startup-stability-window 180`
- `--startup-stability-timeout 900`
- `--recovery-state-file ~/.local/share/ananke/cluster_power_recovery.state`
- `--harbor-bundle-file ~/.local/share/ananke/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst`
-
-Controlled drill checklist (recommended)
- Operator host: use `titan-db` as canonical control host for the drill.
- On-site coordination:
-  - Have on-site operator ready before shutdown starts.
-  - Confirm they will manually power cluster nodes back on after shutdown completes.
-  - Confirm who will announce "all nodes powered on" to resume startup.
- Preflight on `titan-db`:
-  - `mkdir -p ~/ananke-logs`
-  - `~/ananke-cluster-power status` and verify:
-    - `ups_host=pyrphoros@localhost`
-    - `ups_battery` is numeric
-    - `flux_source_ready=True`
- Warm helper image just before shutdown:
-  - `~/ananke-cluster-power prepare --execute`
- Run in a persistent shell and capture logs:
-  - `tmux new -s ananke-drill`
-  - `script -q -a ~/ananke-logs/ananke-drill-$(date +%Y%m%d-%H%M%S).log`
- Execute controlled shutdown with telemetry enforcement:
-  - `~/ananke-cluster-power shutdown --execute --require-ups-battery`
- After on-site power-on confirmation, execute startup:
-  - `~/ananke-cluster-power startup --execute --force-flux-branch main --require-ups-battery`
- Post-check:
-  - `~/ananke-cluster-power status`
-  - Verify critical services (`longhorn`, `vault`, `postgres`, `gitea`, `harbor`, `pegasus`) and no widespread pull/crash failures.
-
-Operational notes
- The flow suspends Flux Kustomizations/HelmReleases during shutdown to prevent churn.
- Shutdown behavior is explicit:
-  - `host-poweroff` schedules host poweroff after service stop.
-  - `cluster-only` stops `k3s`/`k3s-agent` without powering hosts off.
- Worker drain is no longer best-effort only. The script now escalates from normal drain, to `--force`, to `--disable-eviction` once the configured timeout is exhausted.
- Startup fails fast if Flux source URL/branch drift from expected values (unless branch override is explicitly requested with `--force-flux-branch`).
- Flux desired-state source remains `titan-iac.git`. Ananke orchestrates runtime recovery and should not be used as the normal Flux source repo.
- During startup, if Flux source is not `Ready`, local bootstrap fallback is applied first using the repo snapshot under `~/ananke-repo`.
- Longhorn is reconciled before Vault/Postgres/Gitea so storage-backed services are not racing the volume layer.
- Harbor is reconciled after the first critical stateful services.
- Harbor bootstrap is now designed around a control-host bundle:
-  - Build the Harbor bundle locally with `scripts/build_harbor_bootstrap_bundle.sh`.
-  - Stage it on the operator host at `~/.local/share/ananke/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst`.
-  - Use `harbor-seed --execute` or a full `startup --execute` to stream/import that bundle onto `titan-05`.
- The Harbor bundle remains arm64-only because Harbor is pinned to arm64 nodes. The node-helper image is multi-arch because Ananke uses it across both arm64 and amd64 nodes during prepare/shutdown operations.
- Ananke uses a temporary privileged helper pod for host-side operations. The helper image is prewarmed with `prepare --execute` so later shutdown/startup steps do not stall on image pulls.
- The script persists outage state in `~/.local/share/ananke/cluster_power_recovery.state` by default. If startup is attempted during an outage window and power becomes unstable again, rerunning startup with insufficient UPS charge will flip into the emergency shutdown path instead of continuing to bootstrap.
- Startup completion is strict now:
-  - all non-optional Flux kustomizations must be `Ready=True`
-  - external service checklist must pass (defaults include Gitea, Grafana, Harbor)
-  - generated ingress reachability checks must pass (default accepted codes: `200,301,302,307,308,401,403,404`)
-  - stability soak must pass with no crashloop/pull-failure churn
- If Flux hits immutable one-off Job drift during reconcile, Ananke now attempts self-heal by pruning failed Flux-managed Jobs and retrying reconcile.
- In dry-run mode, the script now skips the live API wait step so preview runs do not stall on an offline cluster.
- Dry-run mode no longer mutates outage recovery state.
- `harbor-seed --execute` was validated by:
-  - prewarming the helper image across all nodes
-  - streaming the Harbor bootstrap bundle to `titan-05`
-  - importing Harbor runtime images into host `containerd`
-  - successfully running a Harbor-backed canary pod (`harbor-canary-ok`)
- After bootstrap, Flux resources are resumed and reconciled.
- Keep this runbook aligned with `clusters/atlas/flux-system/gotk-sync.yaml`.
--- a/pytest.ini
+++ b/pytest.ini
@ -1,3 +0,0 @@
-[pytest]
-addopts = -ra
-norecursedirs = .git .venv .venv-ci __pycache__ tmp
--- a/scripts/bootstrap/harbor-bootstrap-images.txt
+++ b/scripts/bootstrap/harbor-bootstrap-images.txt
@ -1,9 +0,0 @@
-# Harbor cold-start bootstrap images.
-registry.bstein.dev/infra/harbor-core:v2.14.1-arm64
-registry.bstein.dev/infra/harbor-jobservice:v2.14.1-arm64
-registry.bstein.dev/infra/harbor-portal:v2.14.1-arm64
-registry.bstein.dev/infra/harbor-registry:v2.14.1-arm64
-registry.bstein.dev/infra/harbor-registryctl:v2.14.1-arm64
-registry.bstein.dev/infra/harbor-redis:v2.14.1-arm64
-registry.bstein.dev/infra/harbor-nginx:v2.14.1-arm64
-registry.bstein.dev/infra/harbor-prepare:v2.14.1-arm64
--- a/scripts/bootstrap/recovery-config.env
+++ b/scripts/bootstrap/recovery-config.env
@ -1,36 +0,0 @@
-CANONICAL_CONTROL_HOST="titan-db"
-DEFAULT_FLUX_BRANCH="main"
-EXPECTED_FLUX_URL="ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git"
-SHUTDOWN_MODE="host-poweroff"
-STATE_SUBDIR=".local/share/ananke"
-HARBOR_BUNDLE_BASENAME="harbor-bootstrap-v2.14.1-arm64.tar.zst"
-HARBOR_TARGET_NODE=""
-HARBOR_CANARY_NODE=""
-HARBOR_HOST_LABEL_KEY="ananke.bstein.dev/harbor-bootstrap"
-HARBOR_CANARY_IMAGE="registry.bstein.dev/bstein/kubectl:1.35.0"
-NODE_HELPER_IMAGE="registry.bstein.dev/bstein/ananke-node-helper:0.1.0"
-NODE_HELPER_NAMESPACE="maintenance"
-NODE_HELPER_SERVICE_ACCOUNT="default"
-REGISTRY_PULL_SECRET="harbor-regcred"
-BUNDLE_HTTP_PORT="8877"
-UPS_HOST="pyrphoros@localhost"
-UPS_BATTERY_KEY="battery.charge"
-FLUX_READY_TIMEOUT_SECONDS="1200"
-FLUX_READY_POLL_SECONDS="10"
-STARTUP_CHECKLIST_TIMEOUT_SECONDS="900"
-STARTUP_CHECKLIST_POLL_SECONDS="10"
-STARTUP_WORKLOAD_TIMEOUT_SECONDS="900"
-STARTUP_WORKLOAD_POLL_SECONDS="10"
-STARTUP_STABILITY_WINDOW_SECONDS="180"
-STARTUP_STABILITY_TIMEOUT_SECONDS="900"
-STARTUP_STABILITY_POLL_SECONDS="10"
-STARTUP_OPTIONAL_KUSTOMIZATIONS=""
-STARTUP_IGNORE_PODS_REGEX=""
-STARTUP_IGNORE_WORKLOADS_REGEX=""
-STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX="^(kube-system|kube-public|kube-node-lease|flux-system)$"
-STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS="10"
-STARTUP_INCLUDE_INGRESS_CHECKS="1"
-STARTUP_INGRESS_ALLOWED_STATUSES="200,301,302,307,308,401,403,404"
-STARTUP_IGNORE_INGRESS_HOSTS_REGEX=""
-STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS="10"
-STARTUP_SERVICE_CHECKLIST='gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"||;grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"||;harbor|https://registry.bstein.dev/v2/|200,401|||'
--- a/scripts/build_ananke_node_helper.sh
+++ b/scripts/build_ananke_node_helper.sh
@ -1,56 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-IMAGE="registry.bstein.dev/bstein/ananke-node-helper:0.1.0"
-DOCKER_CONFIG_PATH=""
-PLATFORMS="linux/amd64,linux/arm64"
-BUILDER_NAME="ananke-node-helper-builder"
-
-while [[ $# -gt 0 ]]; do
-  case "$1" in
-    --image)
-      IMAGE="${2:?missing image}"
-      shift 2
-      ;;
-    --docker-config)
-      DOCKER_CONFIG_PATH="${2:?missing docker config path}"
-      shift 2
-      ;;
-    --platforms)
-      PLATFORMS="${2:?missing platforms}"
-      shift 2
-      ;;
-    --builder)
-      BUILDER_NAME="${2:?missing builder}"
-      shift 2
-      ;;
-    -h|--help)
-      cat <<USAGE
-Usage: scripts/build_ananke_node_helper.sh [--image <image>] [--docker-config <path>] [--platforms <csv>] [--builder <name>]
-USAGE
-      exit 0
-      ;;
-    *)
-      echo "Unknown option: $1" >&2
-      exit 1
-      ;;
-  esac
-done
-
-if [[ -n "${DOCKER_CONFIG_PATH}" ]]; then
-  export DOCKER_CONFIG="${DOCKER_CONFIG_PATH}"
-fi
-
-if ! docker buildx inspect "${BUILDER_NAME}" >/dev/null 2>&1; then
-  docker buildx create --name "${BUILDER_NAME}" --driver docker-container --use >/dev/null
-else
-  docker buildx use "${BUILDER_NAME}" >/dev/null
-fi
-
-docker buildx inspect --bootstrap >/dev/null
-docker buildx build \
-  --platform "${PLATFORMS}" \
-  -f dockerfiles/Dockerfile.ananke-node-helper \
-  -t "${IMAGE}" \
-  --push \
-  .
--- a/scripts/build_harbor_bootstrap_bundle.sh
+++ b/scripts/build_harbor_bootstrap_bundle.sh
@ -1,58 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-IMAGES_FILE="scripts/bootstrap/harbor-bootstrap-images.txt"
-BUNDLE_FILE="artifacts/harbor-bootstrap-v2.14.1-arm64.tar.zst"
-DOCKER_CONFIG_PATH=""
-PLATFORM="linux/arm64"
-
-while [[ $# -gt 0 ]]; do
-  case "$1" in
-    --images-file)
-      IMAGES_FILE="${2:?missing images file}"
-      shift 2
-      ;;
-    --bundle-file)
-      BUNDLE_FILE="${2:?missing bundle file}"
-      shift 2
-      ;;
-    --docker-config)
-      DOCKER_CONFIG_PATH="${2:?missing docker config path}"
-      shift 2
-      ;;
-    --platform)
-      PLATFORM="${2:?missing platform}"
-      shift 2
-      ;;
-    -h|--help)
-      cat <<USAGE
-Usage: scripts/build_harbor_bootstrap_bundle.sh [--images-file <path>] [--bundle-file <path>] [--docker-config <path>] [--platform <linux/arm64>]
-USAGE
-      exit 0
-      ;;
-    *)
-      echo "Unknown option: $1" >&2
-      exit 1
-      ;;
-  esac
-done
-
-if [[ -n "${DOCKER_CONFIG_PATH}" ]]; then
-  export DOCKER_CONFIG="${DOCKER_CONFIG_PATH}"
-fi
-
-mapfile -t IMAGES < <(grep -v '^[[:space:]]*#' "${IMAGES_FILE}" | sed '/^[[:space:]]*$/d')
-if [[ ${#IMAGES[@]} -eq 0 ]]; then
-  echo "No images found in ${IMAGES_FILE}" >&2
-  exit 1
-fi
-
-mkdir -p "$(dirname "${BUNDLE_FILE}")"
-for image in "${IMAGES[@]}"; do
-  echo "Pulling ${image}" >&2
-  docker pull --platform "${PLATFORM}" "${image}" >/dev/null
-
-done
-
-docker save "${IMAGES[@]}" | zstd -T0 -19 -o "${BUNDLE_FILE}"
-echo "Wrote ${BUNDLE_FILE}" >&2
--- a/scripts/cluster_power_console.sh
+++ b/scripts/cluster_power_console.sh
@ -1,87 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-usage() {
-  cat <<'USAGE'
-Usage:
-  scripts/cluster_power_console.sh [--repo-dir <path>] [--delegate-host <host>] <shutdown|startup> [recovery-script-options...]
-
-Purpose:
-  Friendly manual entrypoint for running Ananke from a remote console.
-  Canonical control host is titan-db by default so bundle/state handling stays in one place.
-
-Defaults:
-  --repo-dir       $HOME/Development/ananke (fallback: $HOME/Development/titan-iac)
-  --delegate-host  titan-db
-
-Examples:
-  scripts/cluster_power_console.sh shutdown --execute
-  scripts/cluster_power_console.sh startup --execute --force-flux-branch main
-  scripts/cluster_power_console.sh --delegate-host titan-24 shutdown --execute
-USAGE
-}
-
-if [[ -d "${HOME}/Development/ananke" ]]; then
-  REPO_DIR="${HOME}/Development/ananke"
-else
-  REPO_DIR="${HOME}/Development/titan-iac"
-fi
-DELEGATE_HOST="titan-db"
-REMOTE_REPO_DIR="${ANANKE_REMOTE_REPO_DIR:-}"
-
-while [[ $# -gt 0 ]]; do
-  case "$1" in
-    --repo-dir)
-      REPO_DIR="${2:-}"
-      shift 2
-      ;;
-    --delegate-host)
-      DELEGATE_HOST="${2:-}"
-      shift 2
-      ;;
-    -h|--help)
-      usage
-      exit 0
-      ;;
-    *)
-      break
-      ;;
-  esac
-done
-
-if [[ $# -lt 1 ]]; then
-  usage
-  exit 1
-fi
-
-SIBLING_SCRIPT="${SCRIPT_DIR}/cluster_power_recovery.sh"
-REPO_SCRIPT="${REPO_DIR}/scripts/cluster_power_recovery.sh"
-LOCAL_SCRIPT=""
-
-if [[ -x "${SIBLING_SCRIPT}" ]]; then
-  LOCAL_SCRIPT="${SIBLING_SCRIPT}"
-elif [[ -x "${REPO_SCRIPT}" ]]; then
-  LOCAL_SCRIPT="${REPO_SCRIPT}"
-fi
-
-if [[ -n "${LOCAL_SCRIPT}" ]] && command -v kubectl >/dev/null 2>&1; then
-  exec "${LOCAL_SCRIPT}" "$@"
-fi
-
-if [[ -z "${DELEGATE_HOST}" ]]; then
-  echo "cluster-power-console: no usable local recovery script found and no delegate host configured" >&2
-  exit 1
-fi
-
-quoted_args="$(printf '%q ' "$@")"
-quoted_repo_dir="$(printf '%q' "${REPO_DIR}")"
-
-remote_cmd=""
-if [[ -n "${REMOTE_REPO_DIR}" ]]; then
-  remote_cmd+="ANANKE_REPO_DIR=$(printf '%q' "${REMOTE_REPO_DIR}") "
-fi
-remote_cmd+="if [ -x ~/ananke-tools/cluster_power_recovery.sh ]; then ~/ananke-tools/cluster_power_recovery.sh ${quoted_args}; elif [ -x ${quoted_repo_dir}/scripts/cluster_power_recovery.sh ]; then ${quoted_repo_dir}/scripts/cluster_power_recovery.sh ${quoted_args}; else echo 'cluster-power-console: remote recovery script not found' >&2; exit 1; fi"
-
-exec ssh -o BatchMode=yes -o ConnectTimeout=8 "${DELEGATE_HOST}" "${remote_cmd}"
--- a/scripts/cluster_power_recovery.sh
+++ b/scripts/cluster_power_recovery.sh
--- a/scripts/dashboards_render_atlas.py
+++ b/scripts/dashboards_render_atlas.py
--- a/scripts/knowledge_render_atlas.py
+++ b/scripts/knowledge_render_atlas.py
@ -539,9 +539,9 @@ def main() -> int:
        help="Write generated files (otherwise just print a summary).",
    )
    ap.add_argument(
-        "--sync-comms",
+        "--sync-atlasbot",
        action="store_true",
-        help="Mirror rendered knowledge into services/comms/knowledge for atlasbot.",
+        help="Mirror rendered knowledge into services/atlasbot/knowledge for atlasbot.",
    )
    args = ap.parse_args()

@ -632,10 +632,10 @@ def main() -> int:
    print(f"Wrote {runbooks_json_path.relative_to(REPO_ROOT)}")
    print(f"Wrote {metrics_json_path.relative_to(REPO_ROOT)}")

-    if args.sync_comms:
-        comms_dir = REPO_ROOT / "services" / "comms" / "knowledge"
-        _sync_tree(out_dir, comms_dir)
-        print(f"Synced {out_dir.relative_to(REPO_ROOT)} -> {comms_dir.relative_to(REPO_ROOT)}")
+    if args.sync_atlasbot:
+        atlasbot_dir = REPO_ROOT / "services" / "atlasbot" / "knowledge"
+        _sync_tree(out_dir, atlasbot_dir)
+        print(f"Synced {out_dir.relative_to(REPO_ROOT)} -> {atlasbot_dir.relative_to(REPO_ROOT)}")
    return 0


--- a/scripts/node_recover.sh
+++ b/scripts/node_recover.sh
@ -1,163 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-usage() {
-  cat <<USAGE
-Usage: scripts/node_recover.sh <node-name> [options]
-
-Options:
-  --yes             Skip confirmation prompt
-  --skip-drain      Do not cordon/drain; only capture recovery artifacts
-  --delete-node     Delete Node object after drain (for hard-dead node replacement)
-  --out-dir <dir>   Recovery artifact directory (default: ./artifacts/node-recovery)
-  -h, --help        Show this help
-USAGE
-}
-
-if ! command -v kubectl >/dev/null 2>&1; then
-  echo "kubectl is required" >&2
-  exit 1
-fi
-if ! command -v jq >/dev/null 2>&1; then
-  echo "jq is required" >&2
-  exit 1
-fi
-
-if [ "$#" -lt 1 ]; then
-  usage
-  exit 1
-fi
-
-node=""
-assume_yes="false"
-skip_drain="false"
-delete_node="false"
-out_dir="./artifacts/node-recovery"
-
-while [ "$#" -gt 0 ]; do
-  case "$1" in
-    --yes)
-      assume_yes="true"
-      shift
-      ;;
-    --skip-drain)
-      skip_drain="true"
-      shift
-      ;;
-    --delete-node)
-      delete_node="true"
-      shift
-      ;;
-    --out-dir)
-      out_dir="$2"
-      shift 2
-      ;;
-    -h|--help)
-      usage
-      exit 0
-      ;;
-    -*)
-      echo "Unknown option: $1" >&2
-      usage
-      exit 1
-      ;;
-    *)
-      if [ -z "${node}" ]; then
-        node="$1"
-      else
-        echo "Unexpected argument: $1" >&2
-        usage
-        exit 1
-      fi
-      shift
-      ;;
-  esac
-done
-
-if [ -z "${node}" ]; then
-  echo "Node name is required" >&2
-  usage
-  exit 1
-fi
-
-if ! kubectl get node "${node}" >/dev/null 2>&1; then
-  echo "Node ${node} not found in cluster API" >&2
-  exit 1
-fi
-
-if [ "${assume_yes}" != "true" ]; then
-  echo "About to prepare recovery workflow for node: ${node}"
-  echo "skip_drain=${skip_drain} delete_node=${delete_node}"
-  read -r -p "Type the node name to continue: " confirm
-  if [ "${confirm}" != "${node}" ]; then
-    echo "Confirmation did not match node name; aborting."
-    exit 1
-  fi
-fi
-
-timestamp="$(date +%Y%m%d-%H%M%S)"
-artifacts_dir="${out_dir}/${node}-${timestamp}"
-mkdir -p "${artifacts_dir}"
-
-echo "Saving node and workload artifacts to ${artifacts_dir}"
-kubectl get node "${node}" -o json > "${artifacts_dir}/node.json"
-kubectl get node "${node}" --show-labels > "${artifacts_dir}/node.txt"
-kubectl get pods -A --field-selector "spec.nodeName=${node}" -o wide > "${artifacts_dir}/pods-on-node.txt"
-
-jq -r '
-  .metadata.labels
-  | to_entries[]
-  | select(
-      .key != "kubernetes.io/hostname"
-      and .key != "beta.kubernetes.io/hostname"
-      and .key != "node.kubernetes.io/instance-type"
-      and .key != "beta.kubernetes.io/instance-type"
-      and (.key | startswith("kubernetes.io/") | not)
-      and (.key | startswith("beta.kubernetes.io/") | not)
-      and (.key | startswith("node.kubernetes.io/") | not)
-    )
-  | "kubectl label node <replacement-node> " + .key + "=" + .value + " --overwrite"
-' "${artifacts_dir}/node.json" > "${artifacts_dir}/restore-labels.sh"
-
-jq -r '
-  (.spec.taints // [])[]
-  | "kubectl taint node <replacement-node> "
-    + .key
-    + (if .value then "=" + .value else "" end)
-    + ":"
-    + .effect
-    + " --overwrite"
-' "${artifacts_dir}/node.json" > "${artifacts_dir}/restore-taints.sh"
-
-chmod +x "${artifacts_dir}/restore-labels.sh" "${artifacts_dir}/restore-taints.sh"
-
-if [ "${skip_drain}" != "true" ]; then
-  echo "Cordoning ${node}"
-  kubectl cordon "${node}" || true
-
-  echo "Draining ${node}"
-  if ! kubectl drain "${node}" --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=20m; then
-    echo "Standard drain failed; retrying with --force"
-    if ! kubectl drain "${node}" --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=20m --force; then
-      echo "Force drain failed; retrying with --disable-eviction"
-      kubectl drain "${node}" --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=20m --force --disable-eviction
-    fi
-  fi
-fi
-
-if [ "${delete_node}" = "true" ]; then
-  echo "Deleting node object ${node}"
-  kubectl delete node "${node}" || true
-fi
-
-cat <<NEXT
-Recovery prep complete for ${node}.
-Artifacts: ${artifacts_dir}
-
-Next steps:
-1) Reimage/reprovision replacement host.
-2) Rejoin k3s and wait for node Ready.
-3) Reapply labels: ${artifacts_dir}/restore-labels.sh
-4) Reapply taints: ${artifacts_dir}/restore-taints.sh
-5) Validate pods and uncordon replacement when ready.
-NEXT
--- a/scripts/tests/test_dashboards_render_atlas.py
+++ b/scripts/tests/test_dashboards_render_atlas.py
@ -4,21 +4,13 @@ import pathlib

 def load_module():
    path = pathlib.Path(__file__).resolve().parents[1] / "dashboards_render_atlas.py"
-    spec = importlib.util.spec_from_file_location("scripts.dashboards_render_atlas", path)
+    spec = importlib.util.spec_from_file_location("dashboards_render_atlas", path)
    module = importlib.util.module_from_spec(spec)
    assert spec.loader is not None
    spec.loader.exec_module(module)
    return module


-def flatten_panels(panels):
-    flat = []
-    for panel in panels:
-        flat.append(panel)
-        flat.extend(panel.get("panels", []))
-    return flat
-
-
 def test_table_panel_options_and_filterable():
    mod = load_module()
    panel = mod.table_panel(
@ -50,18 +42,6 @@ def test_node_filter_and_expr_helpers():
    assert "node_memory_MemAvailable_bytes" in mem_expr


-def test_overview_availability_panel_uses_recorded_365d_rollup():
-    mod = load_module()
-    dashboard = mod.build_overview()
-    panel = next(panel for panel in flatten_panels(dashboard["panels"]) if panel["id"] == 27)
-
-    assert panel["title"] == "Atlas Availability (365d)"
-    assert panel["targets"][0]["expr"] == 'last_over_time(atlas:availability:ratio_365d{scope="atlas"}[30m])'
-    assert panel["targets"][0]["instant"] is True
-    assert "precomputed" in panel["description"]
-    assert "scrape gaps are ignored" in panel["description"]
-
-
 def test_render_configmap_writes(tmp_path):
    mod = load_module()
    mod.DASHBOARD_DIR = tmp_path / "dash"
@ -76,93 +56,3 @@ def test_render_configmap_writes(tmp_path):
    content = (tmp_path / "cm.yaml").read_text()
    assert "kind: ConfigMap" in content
    assert f"{uid}.json" in content
-
-
-def test_testing_suite_variable_uses_canonical_values_only():
-    mod = load_module()
-    variable = mod.testing_suite_variable()
-    canonical_matcher = "|".join(mod.PLATFORM_TEST_SUITE_NAMES)
-    legacy_names = {"bstein-home", "data-prepper", "titan-iac", "pegasus-health"}
-
-    assert variable["allValue"] == canonical_matcher
-    assert not any(alias in variable["query"] for alias in legacy_names)
-    assert not any(alias in variable["allValue"] for alias in legacy_names)
-    assert [option["value"] for option in variable["options"]] == mod.PLATFORM_TEST_SUITE_NAMES
-
-
-def test_jobs_dashboard_separates_current_gate_health_from_reliability():
-    mod = load_module()
-    dashboard = mod.build_jobs_dashboard()
-    panels_by_title = {panel["title"]: panel for panel in flatten_panels(dashboard["panels"])}
-
-    assert "Current Gate Health by Suite" in panels_by_title
-    assert "Run Reliability by Suite (24h)" in panels_by_title
-    assert "Run Reliability History by Suite" in panels_by_title
-    assert "Failures by Suite (24h)" not in panels_by_title
-    assert "Success Rate by Suite (24h)" not in panels_by_title
-
-    current_gate_expr = panels_by_title["Current Gate Health by Suite"]["targets"][0]["expr"]
-    assert 'check)' in current_gate_expr
-    assert 'result=~"ok|passed|success|not_applicable|skipped|na|n/a"' in current_gate_expr
-
-    reliability_panel = panels_by_title["Run Reliability by Suite (24h)"]
-    reliability_expr = reliability_panel["targets"][0]["expr"]
-    assert "platform_quality_gate_runs_total" in reliability_expr
-    assert "> 0" in reliability_expr
-    assert "- 1" in reliability_expr
-    assert reliability_panel["fieldConfig"]["defaults"]["mappings"] == [
-        {"type": "value", "options": {"-1": {"text": "no runs"}}}
-    ]
-
-
-def test_jobs_dashboard_bar_gauges_use_solid_threshold_colors():
-    mod = load_module()
-    dashboard = mod.build_jobs_dashboard()
-    panels = flatten_panels(dashboard["panels"])
-    bar_gauges = [panel for panel in panels if panel["type"] == "bargauge"]
-
-    assert bar_gauges
-    assert all(panel["options"]["displayMode"] == "basic" for panel in bar_gauges)
-    assert all(
-        panel["fieldConfig"]["defaults"]["color"]["mode"] == "thresholds"
-        for panel in bar_gauges
-    )
-
-    reliability_panel = next(
-        panel for panel in panels if panel["title"] == "Run Reliability by Suite (24h)"
-    )
-    threshold_steps = reliability_panel["fieldConfig"]["defaults"]["thresholds"]["steps"]
-
-    assert {"color": "yellow", "value": 93} in threshold_steps
-    assert {"color": "blue", "value": 100} in threshold_steps
-
-
-def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint():
-    mod = load_module()
-    dashboard = mod.build_jobs_dashboard()
-    panels = dashboard["panels"]
-    rows = [panel for panel in panels if panel["type"] == "row"]
-    visible_query_panels = [panel for panel in panels if panel["type"] != "row"]
-    nested_panels_by_title = {
-        child["title"]: child
-        for row in rows
-        for child in row.get("panels", [])
-    }
-
-    assert len(panels) == 16
-    assert len(visible_query_panels) == 11
-    assert sum(len(panel.get("targets", [])) for panel in visible_query_panels) == 11
-    assert [row["title"] for row in rows] == [
-        "Reliability And Run History",
-        "Failure Trends By Check",
-        "Success Trends By Check",
-        "Test Drilldowns And Problem Tests",
-        "Telemetry Completeness, SonarQube, And Branches",
-    ]
-    assert all(row["collapsed"] for row in rows)
-
-    assert "Failure Trend: Coverage" in nested_panels_by_title
-    assert "Success Trend: Supply Chain" in nested_panels_by_title
-    assert "Selected Test Pass Rate History" in nested_panels_by_title
-    assert "Missing Coverage Metrics by Suite" in nested_panels_by_title
-    assert "SonarQube API Up" in nested_panels_by_title
--- a/scripts/tests/test_mailu_sync.py
+++ b/scripts/tests/test_mailu_sync.py
@ -1,7 +1,5 @@
 import importlib.util
 import pathlib
-import sys
-import types

 import pytest

@ -22,26 +20,6 @@ def load_sync_module(monkeypatch):
    }
    for k, v in env.items():
        monkeypatch.setenv(k, v)
-    fake_psycopg2 = types.ModuleType("psycopg2")
-    fake_psycopg2.Error = Exception
-    fake_psycopg2.connect = lambda **kwargs: None
-    fake_psycopg2_extras = types.ModuleType("psycopg2.extras")
-    fake_psycopg2_extras.RealDictCursor = object
-    fake_passlib = types.ModuleType("passlib")
-    fake_passlib_hash = types.ModuleType("passlib.hash")
-
-    class _FakeBcryptSha256:
-        @staticmethod
-        def hash(password):
-            return f"stub:{password}"
-
-    fake_passlib_hash.bcrypt_sha256 = _FakeBcryptSha256
-    fake_passlib.hash = fake_passlib_hash
-
-    monkeypatch.setitem(sys.modules, "psycopg2", fake_psycopg2)
-    monkeypatch.setitem(sys.modules, "psycopg2.extras", fake_psycopg2_extras)
-    monkeypatch.setitem(sys.modules, "passlib", fake_passlib)
-    monkeypatch.setitem(sys.modules, "passlib.hash", fake_passlib_hash)
    module_path = (
        pathlib.Path(__file__).resolve().parents[2]
        / "services"
@ -138,100 +116,6 @@ def test_kc_get_users_paginates(monkeypatch):
    assert sync.SESSION.calls == 1


-def test_kc_get_users_fetches_second_page_after_full_batch(monkeypatch):
-    sync = load_sync_module(monkeypatch)
-
-    class _PagedSession:
-        def __init__(self):
-            self.calls = 0
-            self.first_params = []
-
-        def get(self, *_, **kwargs):
-            self.calls += 1
-            self.first_params.append(kwargs["params"]["first"])
-            if self.calls == 1:
-                return _FakeResponse([{"id": f"u{i}"} for i in range(200)])
-            return _FakeResponse([{"id": "last"}])
-
-    sync.SESSION = _PagedSession()
-
-    users = sync.kc_get_users("tok")
-
-    assert len(users) == 201
-    assert sync.SESSION.first_params == [0, 200]
-
-
-def test_get_kc_token_posts_client_credentials(monkeypatch):
-    sync = load_sync_module(monkeypatch)
-    calls = []
-
-    class _TokenSession:
-        def post(self, url, data, timeout):
-            calls.append((url, data, timeout))
-            return _FakeResponse({"access_token": "tok"})
-
-    sync.SESSION = _TokenSession()
-
-    assert sync.get_kc_token() == "tok"
-    assert calls[0][1]["grant_type"] == "client_credentials"
-
-
-def test_retry_request_retries_then_succeeds(monkeypatch):
-    sync = load_sync_module(monkeypatch)
-    attempts = []
-    sleeps = []
-
-    def _flaky():
-        attempts.append(1)
-        if len(attempts) == 1:
-            raise sync.requests.RequestException("temporary")
-        return "ok"
-
-    monkeypatch.setattr(sync.time, "sleep", lambda seconds: sleeps.append(seconds))
-
-    assert sync.retry_request("request", _flaky, attempts=2) == "ok"
-    assert sleeps == [2]
-
-
-def test_retry_request_reraises_final_error(monkeypatch):
-    sync = load_sync_module(monkeypatch)
-    monkeypatch.setattr(sync.time, "sleep", lambda seconds: None)
-
-    with pytest.raises(sync.requests.RequestException):
-        sync.retry_request(
-            "request",
-            lambda: (_ for _ in ()).throw(sync.requests.RequestException("nope")),
-            attempts=1,
-        )
-
-
-def test_retry_db_connect_retries_then_succeeds(monkeypatch):
-    sync = load_sync_module(monkeypatch)
-    attempts = []
-    sleeps = []
-
-    def _connect(**kwargs):
-        attempts.append(kwargs)
-        if len(attempts) == 1:
-            raise sync.psycopg2.Error("not yet")
-        return "conn"
-
-    monkeypatch.setattr(sync.psycopg2, "connect", _connect)
-    monkeypatch.setattr(sync.time, "sleep", lambda seconds: sleeps.append(seconds))
-
-    assert sync.retry_db_connect(attempts=2) == "conn"
-    assert sleeps == [2]
-
-
-def test_retry_db_connect_reraises_final_error(monkeypatch):
-    sync = load_sync_module(monkeypatch)
-    monkeypatch.setattr(sync.psycopg2, "connect", lambda **kwargs: (_ for _ in ()).throw(sync.psycopg2.Error("down")))
-    monkeypatch.setattr(sync.time, "sleep", lambda seconds: None)
-
-    with pytest.raises(sync.psycopg2.Error):
-        sync.retry_db_connect(attempts=1)
-
-
 def test_ensure_mailu_user_skips_foreign_domain(monkeypatch):
    sync = load_sync_module(monkeypatch)
    executed = []
@ -260,87 +144,6 @@ def test_ensure_mailu_user_upserts(monkeypatch):
    assert captured["password"] != "pw"


-def test_attribute_and_email_helpers(monkeypatch):
-    sync = load_sync_module(monkeypatch)
-
-    assert sync.get_attribute_value({"x": ["first", "second"]}, "x") == "first"
-    assert sync.get_attribute_value({"x": []}, "x") is None
-    assert sync.get_attribute_value({"x": "value"}, "x") == "value"
-    assert sync.mailu_enabled({"mailu_email": ["legacy@example.com"]}) is True
-    assert sync.mailu_enabled({"mailu_enabled": ["off"]}) is False
-    assert sync.resolve_mailu_email({"username": "fallback", "email": "user@example.com"}, {}) == "user@example.com"
-    assert sync.resolve_mailu_email({"username": "fallback", "email": "user@other.com"}, {}) == "fallback@example.com"
-
-
-def test_safe_update_payload_filters_fields(monkeypatch):
-    sync = load_sync_module(monkeypatch)
-
-    payload = sync._safe_update_payload(
-        {
-            "username": "user",
-            "enabled": True,
-            "email": "user@example.com",
-            "emailVerified": False,
-            "firstName": "User",
-            "lastName": "Example",
-            "requiredActions": ["UPDATE_PASSWORD", 7],
-            "attributes": "not-a-dict",
-            "ignored": "value",
-        }
-    )
-
-    assert payload == {
-        "username": "user",
-        "enabled": True,
-        "email": "user@example.com",
-        "emailVerified": False,
-        "firstName": "User",
-        "lastName": "Example",
-        "requiredActions": ["UPDATE_PASSWORD"],
-        "attributes": {},
-    }
-
-
-def test_ensure_system_mailboxes_handles_configurations(monkeypatch, capsys):
-    sync = load_sync_module(monkeypatch)
-    ensured = []
-    monkeypatch.setattr(sync, "MAILU_SYSTEM_USERS", ["postmaster@example.com", "abuse"])
-    monkeypatch.setattr(sync, "MAILU_SYSTEM_PASSWORD", "")
-
-    sync.ensure_system_mailboxes(object())
-
-    assert "MAILU_SYSTEM_PASSWORD is missing" in capsys.readouterr().out
-
-    def _ensure(cursor, email, password, display_name):
-        ensured.append((email, password, display_name))
-        if email == "abuse":
-            raise RuntimeError("boom")
-
-    monkeypatch.setattr(sync, "MAILU_SYSTEM_PASSWORD", "pw")
-    monkeypatch.setattr(sync, "ensure_mailu_user", _ensure)
-
-    sync.ensure_system_mailboxes(object())
-
-    out = capsys.readouterr().out
-    assert ensured == [
-        ("postmaster@example.com", "pw", "postmaster"),
-        ("abuse", "pw", "abuse"),
-    ]
-    assert "Ensured system mailbox for postmaster@example.com" in out
-    assert "Failed to ensure system mailbox abuse" in out
-
-
-def test_main_exits_without_users_or_system_mailboxes(monkeypatch, capsys):
-    sync = load_sync_module(monkeypatch)
-    monkeypatch.setattr(sync, "MAILU_SYSTEM_USERS", [])
-    monkeypatch.setattr(sync, "get_kc_token", lambda: "tok")
-    monkeypatch.setattr(sync, "kc_get_users", lambda token: [])
-
-    sync.main()
-
-    assert "No users found; exiting." in capsys.readouterr().out
-
-
 def test_main_generates_password_and_upserts(monkeypatch):
    sync = load_sync_module(monkeypatch)
    monkeypatch.setattr(sync.bcrypt_sha256, "hash", lambda password: f"hash:{password}")
--- a/scripts/tests/test_mailu_sync_listener.py
+++ b/scripts/tests/test_mailu_sync_listener.py
@ -1,134 +0,0 @@
-import importlib.util
-import io
-import pathlib
-import types
-
-
-def load_listener_module(monkeypatch):
-    monkeypatch.setenv("MAILU_SYNC_WAIT_TIMEOUT_SEC", "0")
-    module_path = (
-        pathlib.Path(__file__).resolve().parents[2]
-        / "services"
-        / "mailu"
-        / "scripts"
-        / "mailu_sync_listener.py"
-    )
-    spec = importlib.util.spec_from_file_location("mailu_sync_listener_testmod", module_path)
-    module = importlib.util.module_from_spec(spec)
-    assert spec.loader is not None
-    spec.loader.exec_module(module)
-    return module
-
-
-def _handler_for(listener, body):
-    handler = listener.Handler.__new__(listener.Handler)
-    raw = body if isinstance(body, bytes) else body.encode()
-    handler.headers = {"Content-Length": str(len(raw))}
-    handler.rfile = io.BytesIO(raw)
-    handler.responses = []
-    handler.headers_ended = 0
-    handler.send_response = lambda code: handler.responses.append(code)
-    handler.end_headers = lambda: setattr(handler, "headers_ended", handler.headers_ended + 1)
-    return handler
-
-
-def test_listener_run_sync_blocking_updates_state(monkeypatch):
-    listener = load_listener_module(monkeypatch)
-    monkeypatch.setattr(listener, "time", lambda: 42.0)
-    monkeypatch.setattr(
-        listener.subprocess,
-        "run",
-        lambda command, check: types.SimpleNamespace(returncode=3),
-    )
-
-    assert listener._run_sync_blocking() == 3
-    assert listener.last_rc == 3
-    assert listener.last_run == 42.0
-    assert listener.sync_done.is_set()
-
-    listener.sync_running = True
-    assert listener._run_sync_blocking() == 0
-
-
-def test_listener_trigger_sync_async_honors_running_and_debounce(monkeypatch):
-    listener = load_listener_module(monkeypatch)
-    starts = []
-
-    class _Thread:
-        def __init__(self, target, daemon):
-            self.target = target
-            self.daemon = daemon
-
-        def start(self):
-            starts.append((self.target, self.daemon))
-
-    monkeypatch.setattr(listener.threading, "Thread", _Thread)
-    monkeypatch.setattr(listener, "time", lambda: 100.0)
-
-    listener.sync_running = True
-    assert listener._trigger_sync_async() is False
-
-    listener.sync_running = False
-    listener.last_run = 95.0
-    assert listener._trigger_sync_async() is False
-
-    assert listener._trigger_sync_async(force=True) is True
-    assert starts and starts[0][1] is True
-
-
-def test_listener_post_rejects_invalid_json(monkeypatch):
-    listener = load_listener_module(monkeypatch)
-    handler = _handler_for(listener, b"{not-json")
-
-    handler.do_POST()
-
-    assert handler.responses == [400]
-    assert handler.headers_ended == 1
-
-
-def test_listener_post_triggers_async_without_wait(monkeypatch):
-    listener = load_listener_module(monkeypatch)
-    called = []
-    monkeypatch.setattr(listener, "_trigger_sync_async", lambda force=False: called.append(force) or True)
-    handler = _handler_for(listener, '{"force": true}')
-
-    handler.do_POST()
-
-    assert called == [True]
-    assert handler.responses == [202]
-
-
-def test_listener_post_wait_returns_success_or_failure(monkeypatch):
-    listener = load_listener_module(monkeypatch)
-    called = []
-    monkeypatch.setattr(listener, "_trigger_sync_async", lambda force=False: called.append(force) or True)
-    listener.sync_running = False
-    listener.last_rc = 0
-    handler = _handler_for(listener, '{"wait": true, "force": true}')
-
-    handler.do_POST()
-
-    assert called == [True]
-    assert handler.responses == [200]
-
-    listener.last_rc = 2
-    handler = _handler_for(listener, '{"wait": true}')
-    handler.do_POST()
-    assert handler.responses == [500]
-
-
-def test_listener_post_wait_keeps_running_request_successful(monkeypatch):
-    listener = load_listener_module(monkeypatch)
-    listener.sync_running = True
-    handler = _handler_for(listener, '{"wait": true}')
-
-    handler.do_POST()
-
-    assert handler.responses == [200]
-
-
-def test_listener_log_message_is_quiet(monkeypatch):
-    listener = load_listener_module(monkeypatch)
-    handler = listener.Handler.__new__(listener.Handler)
-
-    assert handler.log_message("ignored %s", "value") is None
--- a/scripts/verify_jenkins_workspace_cleanup_rollout.sh
+++ b/scripts/verify_jenkins_workspace_cleanup_rollout.sh
@ -1,73 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-
-MODE="${1:-dry-run}"
-if [[ "$MODE" != "dry-run" && "$MODE" != "active" ]]; then
-  echo "usage: $0 [dry-run|active]" >&2
-  exit 2
-fi
-
-EXPECTED_DRY_RUN="true"
-PROM_MODE="dry_run"
-if [[ "$MODE" == "active" ]]; then
-  EXPECTED_DRY_RUN="false"
-  PROM_MODE="delete"
-fi
-
-KUSTOMIZATION="${KUSTOMIZATION:-maintenance}"
-NAMESPACE="${NAMESPACE:-maintenance}"
-DEPLOYMENT="${DEPLOYMENT:-ariadne}"
-LOCAL_METRICS_PORT="${LOCAL_METRICS_PORT:-18080}"
-
-for cmd in flux kubectl curl grep awk; do
-  if ! command -v "$cmd" >/dev/null 2>&1; then
-    echo "missing required command: $cmd" >&2
-    exit 2
-  fi
-done
-
-echo "[1/5] reconcile Flux kustomization: ${KUSTOMIZATION}"
-flux reconcile kustomization "$KUSTOMIZATION" --namespace flux-system --with-source
-
-echo "[2/5] wait for deployment rollout"
-kubectl -n "$NAMESPACE" rollout status "deployment/$DEPLOYMENT" --timeout=5m
-
-echo "[3/5] verify ariadne env wiring"
-ENV_DUMP="$(kubectl -n "$NAMESPACE" get deployment "$DEPLOYMENT" -o jsonpath='{range .spec.template.spec.containers[0].env[*]}{.name}={.value}{"\n"}{end}')"
-echo "$ENV_DUMP" | grep -F "ARIADNE_SCHEDULE_JENKINS_WORKSPACE_CLEANUP=45 */6 * * *"
-echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_NAMESPACE=jenkins"
-echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_PVC_PREFIX=pvc-workspace-"
-echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS=24"
-echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_DRY_RUN=${EXPECTED_DRY_RUN}"
-echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_MAX_DELETIONS_PER_RUN=20"
-
-echo "[4/5] scrape /metrics and confirm cleanup metrics are exported"
-PF_LOG="$(mktemp)"
-METRICS_FILE="$(mktemp)"
-cleanup() {
-  if [[ -n "${PF_PID:-}" ]]; then
-    kill "$PF_PID" >/dev/null 2>&1 || true
-    wait "$PF_PID" 2>/dev/null || true
-  fi
-  rm -f "$PF_LOG" "$METRICS_FILE"
-}
-trap cleanup EXIT
-
-kubectl -n "$NAMESPACE" port-forward "deployment/$DEPLOYMENT" "${LOCAL_METRICS_PORT}:8080" >"$PF_LOG" 2>&1 &
-PF_PID=$!
-sleep 2
-curl -fsS "http://127.0.0.1:${LOCAL_METRICS_PORT}/metrics" >"$METRICS_FILE"
-grep -F "# HELP ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE"
-grep -F "# HELP ariadne_jenkins_workspace_cleanup_objects_total" "$METRICS_FILE"
-
-echo "[5/5] show recent cleanup signal"
-if grep -q "ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE"; then
-  grep "ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE" | grep "mode=\"${PROM_MODE}\"" || true
-else
-  echo "No run counter sample yet for mode=${PROM_MODE}; wait for schedule window and re-run." >&2
-fi
-
-echo "Recent cleanup logs (if any):"
-kubectl -n "$NAMESPACE" logs "deployment/$DEPLOYMENT" --tail=500 | grep -i "jenkins workspace cleanup" | tail -n 20 || true
-
-echo "verification complete for mode=${MODE}"
--- a/services/ai-llm/deployment.yaml
+++ b/services/ai-llm/deployment.yaml
@ -5,7 +5,7 @@ metadata:
  name: ollama
  namespace: ai
 spec:
-  replicas: 0
+  replicas: 1
  revisionHistoryLimit: 2
  strategy:
    type: RollingUpdate
@ -21,7 +21,7 @@ spec:
        app: ollama
      annotations:
        ai.bstein.dev/model: qwen2.5:14b-instruct-q4_0
-        ai.bstein.dev/gpu: GPU pool (titan-20/21)
+        ai.bstein.dev/gpu: GPU pool (titan-22/24)
        ai.bstein.dev/restartedAt: "2026-01-26T12:00:00Z"
    spec:
      affinity:
@ -32,13 +32,13 @@ spec:
                  - key: kubernetes.io/hostname
                    operator: In
                    values:
-                      - titan-20
-                      - titan-21
+                      - titan-22
+                      - titan-24
      runtimeClassName: nvidia
      volumes:
        - name: models
          persistentVolumeClaim:
-            claimName: ollama-models-asteria
+            claimName: ollama-models
      initContainers:
        - name: warm-model
          image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d
--- a/services/ai-llm/pvc.yaml
+++ b/services/ai-llm/pvc.yaml
@ -2,12 +2,12 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
-  name: ollama-models-asteria
+  name: ollama-models
  namespace: ai
 spec:
  accessModes:
-    - ReadWriteMany
+    - ReadWriteOnce
  resources:
    requests:
      storage: 30Gi
-  storageClassName: asteria
+  storageClassName: astreae
--- a/services/atlasbot/atlasbot-deployment.yaml
+++ b/services/atlasbot/atlasbot-deployment.yaml
@ -3,7 +3,7 @@ apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: atlasbot
-  namespace: comms
+  namespace: ai
  labels:
    app: atlasbot
 spec:
@ -16,9 +16,9 @@ spec:
      labels:
        app: atlasbot
      annotations:
-        checksum/atlasbot-configmap: manual-atlasbot-103
+        checksum/atlasbot-configmap: manual-atlasbot-101
        vault.hashicorp.com/agent-inject: "true"
-        vault.hashicorp.com/role: "comms"
+        vault.hashicorp.com/role: "ai"
        vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
        vault.hashicorp.com/agent-inject-template-turn-secret: |
          {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
@ -67,17 +67,17 @@ spec:
        hardware: rpi5
      containers:
        - name: atlasbot
-          image: python:3.11-slim
+          image: registry.bstein.dev/bstein/atlasbot:0.1.0-55
          command: ["/bin/sh","-c"]
          args:
            - |
-              . /vault/scripts/comms_vault_env.sh
-              exec python /app/bot.py
+              . /vault/scripts/atlasbot_vault_env.sh
+              exec python -m atlasbot.main
          env:
            - name: MATRIX_BASE
-              value: http://othrys-synapse-matrix-synapse:8008
+              value: http://othrys-synapse-matrix-synapse.comms.svc.cluster.local:8008
            - name: AUTH_BASE
-              value: http://matrix-authentication-service:8080
+              value: http://matrix-authentication-service.comms.svc.cluster.local:8080
            - name: KB_DIR
              value: /kb
            - name: VM_URL
@ -93,7 +93,7 @@ spec:
            - name: BOT_USER_GENIUS
              value: atlas-genius
            - name: BOT_MENTIONS
-              value: atlas-quick,atlas-smart,atlas-genius,atlas_quick,atlas_smart,atlas_genius
+              value: atlas-quick,atlas-smart,atlas-genius
            - name: OLLAMA_URL
              value: http://ollama.ai.svc.cluster.local:11434
            - name: OLLAMA_MODEL
@ -104,26 +104,50 @@ spec:
              value: qwen2.5:14b-instruct-q4_0
            - name: ATLASBOT_MODEL_GENIUS
              value: qwen2.5:14b-instruct-q4_0
-            - name: ATLASBOT_MODEL_DEEP
-              value: qwen2.5:14b-instruct-q4_0
            - name: OLLAMA_FALLBACK_MODEL
              value: qwen2.5:14b-instruct-q4_0
            - name: OLLAMA_TIMEOUT_SEC
              value: "600"
+            - name: OLLAMA_RETRIES
+              value: "0"
+            - name: ATLASBOT_THINKING_INTERVAL_SEC
+              value: "30"
            - name: ATLASBOT_QUICK_TIME_BUDGET_SEC
              value: "15"
            - name: ATLASBOT_SMART_TIME_BUDGET_SEC
              value: "45"
            - name: ATLASBOT_GENIUS_TIME_BUDGET_SEC
              value: "180"
-            - name: ATLASBOT_OLLAMA_RETRIES
-              value: "0"
-            - name: ATLASBOT_THINKING_INTERVAL_SEC
-              value: "30"
            - name: ATLASBOT_SNAPSHOT_TTL_SEC
              value: "30"
            - name: ATLASBOT_HTTP_PORT
              value: "8090"
+            - name: ATLASBOT_STATE_DB
+              value: /data/atlasbot_state.db
+            - name: ATLASBOT_QUEUE_ENABLED
+              value: "false"
+            - name: ATLASBOT_DEBUG_PIPELINE
+              value: "true"
+            - name: ATLASBOT_NATS_URL
+              value: nats://nats.nats.svc.cluster.local:4222
+            - name: ATLASBOT_NATS_STREAM
+              value: atlasbot
+            - name: ATLASBOT_NATS_SUBJECT
+              value: atlasbot.requests
+            - name: ATLASBOT_FAST_MAX_ANGLES
+              value: "2"
+            - name: ATLASBOT_SMART_MAX_ANGLES
+              value: "5"
+            - name: ATLASBOT_FAST_MAX_CANDIDATES
+              value: "2"
+            - name: ATLASBOT_SMART_MAX_CANDIDATES
+              value: "6"
+            - name: ATLASBOT_FAST_LLM_CALLS_MAX
+              value: "8"
+            - name: ATLASBOT_SMART_LLM_CALLS_MAX
+              value: "24"
+            - name: ATLASBOT_GENIUS_LLM_CALLS_MAX
+              value: "72"
          ports:
            - name: http
              containerPort: 8090
@ -135,19 +159,15 @@ spec:
              cpu: 500m
              memory: 512Mi
          volumeMounts:
-            - name: code
-              mountPath: /app/bot.py
-              subPath: bot.py
            - name: kb
              mountPath: /kb
              readOnly: true
            - name: vault-scripts
              mountPath: /vault/scripts
              readOnly: true
+            - name: atlasbot-state
+              mountPath: /data
      volumes:
-        - name: code
-          configMap:
-            name: atlasbot
        - name: kb
          configMap:
            name: atlas-kb
@ -166,5 +186,7 @@ spec:
                path: diagrams/atlas-http.mmd
        - name: vault-scripts
          configMap:
-            name: comms-vault-env
+            name: atlasbot-vault-env
            defaultMode: 0555
+        - name: atlasbot-state
+          emptyDir: {}
--- a/services/atlasbot/atlasbot-rbac.yaml
+++ b/services/atlasbot/atlasbot-rbac.yaml
@ -3,7 +3,9 @@ apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: atlasbot
-  namespace: comms
+  namespace: ai
+imagePullSecrets:
+  - name: harbor-regcred
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
@ -43,5 +45,4 @@ roleRef:
 subjects:
  - kind: ServiceAccount
    name: atlasbot
-    namespace: comms
-
+    namespace: ai
--- a/services/atlasbot/atlasbot-service.yaml
+++ b/services/atlasbot/atlasbot-service.yaml
@ -2,7 +2,7 @@ apiVersion: v1
 kind: Service
 metadata:
  name: atlasbot
-  namespace: comms
+  namespace: ai
  labels:
    app: atlasbot
 spec:
--- a/services/atlasbot/image-automation.yaml
+++ b/services/atlasbot/image-automation.yaml
@ -0,0 +1,26 @@
+# services/atlasbot/image-automation.yaml
+apiVersion: image.toolkit.fluxcd.io/v1
+kind: ImageUpdateAutomation
+metadata:
+  name: atlasbot
+  namespace: ai
+spec:
+  interval: 1m0s
+  sourceRef:
+    kind: GitRepository
+    name: flux-system
+    namespace: flux-system
+  git:
+    checkout:
+      ref:
+        branch: feature/atlasbot
+    commit:
+      author:
+        name: flux-bot
+        email: ops@bstein.dev
+      messageTemplate: "chore(atlasbot): automated image update"
+    push:
+      branch: feature/atlasbot
+  update:
+    path: services/atlasbot
+    strategy: Setters
--- a/services/atlasbot/image.yaml
+++ b/services/atlasbot/image.yaml
@ -0,0 +1,23 @@
+# services/comms/image.yaml
+apiVersion: image.toolkit.fluxcd.io/v1beta2
+kind: ImageRepository
+metadata:
+  name: atlasbot
+  namespace: ai
+spec:
+  image: registry.bstein.dev/bstein/atlasbot
+  interval: 1m0s
+  secretRef:
+    name: harbor-regcred
+---
+apiVersion: image.toolkit.fluxcd.io/v1beta2
+kind: ImagePolicy
+metadata:
+  name: atlasbot
+  namespace: ai
+spec:
+  imageRepositoryRef:
+    name: atlasbot
+  policy:
+    semver:
+      range: ">=0.1.0-0"
--- a/services/atlasbot/knowledge/INDEX.md
+++ b/services/atlasbot/knowledge/INDEX.md
@ -0,0 +1,22 @@
+Atlas Knowledge Base (KB)
+
+This folder is the source-of-truth “memory” for Atlas/Titan assistants (and for humans). It is designed to be:
+- Accurate (grounded in GitOps + read-only cluster tools)
+- Maintainable (small docs + deterministic generators)
+- Safe (no secrets; refer to Secret/Vault paths by name only)
+
+Layout
+- `knowledge/runbooks/`: human-written docs (short, chunkable Markdown).
+- `knowledge/catalog/`: generated machine-readable facts (YAML/JSON).
+- `knowledge/diagrams/`: generated Mermaid diagrams (`.mmd`) derived from the catalog.
+
+Regeneration
+- Update manifests/docs, then regenerate generated artifacts:
+  - `python scripts/knowledge_render_atlas.py --write`
+
+Authoring rules
+- Never include secret values. Prefer `secretRef` names or Vault paths like `kv/atlas/...`.
+- Prefer stable identifiers: Kubernetes `namespace/name`, DNS hostnames, Flux kustomization paths.
+- Keep each runbook small; one topic per file; use headings.
+- When in doubt, link to the exact file path in this repo that configures the behavior.
+
--- a/services/atlasbot/knowledge/catalog/atlas-summary.json
+++ b/services/atlasbot/knowledge/catalog/atlas-summary.json
@ -0,0 +1,8 @@
+{
+  "counts": {
+    "helmrelease_host_hints": 19,
+    "http_endpoints": 45,
+    "services": 47,
+    "workloads": 74
+  }
+}
--- a/services/atlasbot/knowledge/catalog/atlas.json
+++ b/services/atlasbot/knowledge/catalog/atlas.json
--- a/services/atlasbot/knowledge/catalog/metrics.json
+++ b/services/atlasbot/knowledge/catalog/metrics.json
--- a/services/atlasbot/knowledge/catalog/runbooks.json
+++ b/services/atlasbot/knowledge/catalog/runbooks.json
--- a/services/atlasbot/knowledge/diagrams/atlas-http.mmd
+++ b/services/atlasbot/knowledge/diagrams/atlas-http.mmd
@ -0,0 +1,234 @@
+flowchart LR
+  host_auth_bstein_dev["auth.bstein.dev"]
+  svc_sso_oauth2_proxy["sso/oauth2-proxy (Service)"]
+  host_auth_bstein_dev --> svc_sso_oauth2_proxy
+  wl_sso_oauth2_proxy["sso/oauth2-proxy (Deployment)"]
+  svc_sso_oauth2_proxy --> wl_sso_oauth2_proxy
+  host_bstein_dev["bstein.dev"]
+  svc_bstein_dev_home_bstein_dev_home_frontend["bstein-dev-home/bstein-dev-home-frontend (Service)"]
+  host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_frontend
+  wl_bstein_dev_home_bstein_dev_home_frontend["bstein-dev-home/bstein-dev-home-frontend (Deployment)"]
+  svc_bstein_dev_home_bstein_dev_home_frontend --> wl_bstein_dev_home_bstein_dev_home_frontend
+  svc_comms_matrix_wellknown["comms/matrix-wellknown (Service)"]
+  host_bstein_dev --> svc_comms_matrix_wellknown
+  wl_comms_matrix_wellknown["comms/matrix-wellknown (Deployment)"]
+  svc_comms_matrix_wellknown --> wl_comms_matrix_wellknown
+  svc_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Service)"]
+  host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_backend
+  wl_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Deployment)"]
+  svc_bstein_dev_home_bstein_dev_home_backend --> wl_bstein_dev_home_bstein_dev_home_backend
+  host_budget_bstein_dev["budget.bstein.dev"]
+  svc_finance_actual_budget["finance/actual-budget (Service)"]
+  host_budget_bstein_dev --> svc_finance_actual_budget
+  wl_finance_actual_budget["finance/actual-budget (Deployment)"]
+  svc_finance_actual_budget --> wl_finance_actual_budget
+  host_call_live_bstein_dev["call.live.bstein.dev"]
+  svc_comms_element_call["comms/element-call (Service)"]
+  host_call_live_bstein_dev --> svc_comms_element_call
+  wl_comms_element_call["comms/element-call (Deployment)"]
+  svc_comms_element_call --> wl_comms_element_call
+  host_chat_ai_bstein_dev["chat.ai.bstein.dev"]
+  svc_bstein_dev_home_chat_ai_gateway["bstein-dev-home/chat-ai-gateway (Service)"]
+  host_chat_ai_bstein_dev --> svc_bstein_dev_home_chat_ai_gateway
+  wl_bstein_dev_home_chat_ai_gateway["bstein-dev-home/chat-ai-gateway (Deployment)"]
+  svc_bstein_dev_home_chat_ai_gateway --> wl_bstein_dev_home_chat_ai_gateway
+  host_ci_bstein_dev["ci.bstein.dev"]
+  svc_jenkins_jenkins["jenkins/jenkins (Service)"]
+  host_ci_bstein_dev --> svc_jenkins_jenkins
+  wl_jenkins_jenkins["jenkins/jenkins (Deployment)"]
+  svc_jenkins_jenkins --> wl_jenkins_jenkins
+  host_cloud_bstein_dev["cloud.bstein.dev"]
+  svc_nextcloud_nextcloud["nextcloud/nextcloud (Service)"]
+  host_cloud_bstein_dev --> svc_nextcloud_nextcloud
+  wl_nextcloud_nextcloud["nextcloud/nextcloud (Deployment)"]
+  svc_nextcloud_nextcloud --> wl_nextcloud_nextcloud
+  host_health_bstein_dev["health.bstein.dev"]
+  svc_health_wger["health/wger (Service)"]
+  host_health_bstein_dev --> svc_health_wger
+  wl_health_wger["health/wger (Deployment)"]
+  svc_health_wger --> wl_health_wger
+  host_kit_live_bstein_dev["kit.live.bstein.dev"]
+  svc_comms_livekit_token_service["comms/livekit-token-service (Service)"]
+  host_kit_live_bstein_dev --> svc_comms_livekit_token_service
+  wl_comms_livekit_token_service["comms/livekit-token-service (Deployment)"]
+  svc_comms_livekit_token_service --> wl_comms_livekit_token_service
+  svc_comms_livekit["comms/livekit (Service)"]
+  host_kit_live_bstein_dev --> svc_comms_livekit
+  wl_comms_livekit["comms/livekit (Deployment)"]
+  svc_comms_livekit --> wl_comms_livekit
+  host_live_bstein_dev["live.bstein.dev"]
+  host_live_bstein_dev --> svc_comms_matrix_wellknown
+  svc_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Service)"]
+  host_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse
+  svc_comms_matrix_guest_register["comms/matrix-guest-register (Service)"]
+  host_live_bstein_dev --> svc_comms_matrix_guest_register
+  wl_comms_matrix_guest_register["comms/matrix-guest-register (Deployment)"]
+  svc_comms_matrix_guest_register --> wl_comms_matrix_guest_register
+  svc_comms_matrix_authentication_service["comms/matrix-authentication-service (Service)"]
+  host_live_bstein_dev --> svc_comms_matrix_authentication_service
+  wl_comms_matrix_authentication_service["comms/matrix-authentication-service (Deployment)"]
+  svc_comms_matrix_authentication_service --> wl_comms_matrix_authentication_service
+  host_logs_bstein_dev["logs.bstein.dev"]
+  svc_logging_oauth2_proxy_logs["logging/oauth2-proxy-logs (Service)"]
+  host_logs_bstein_dev --> svc_logging_oauth2_proxy_logs
+  wl_logging_oauth2_proxy_logs["logging/oauth2-proxy-logs (Deployment)"]
+  svc_logging_oauth2_proxy_logs --> wl_logging_oauth2_proxy_logs
+  host_longhorn_bstein_dev["longhorn.bstein.dev"]
+  svc_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Service)"]
+  host_longhorn_bstein_dev --> svc_longhorn_system_oauth2_proxy_longhorn
+  wl_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Deployment)"]
+  svc_longhorn_system_oauth2_proxy_longhorn --> wl_longhorn_system_oauth2_proxy_longhorn
+  host_mail_bstein_dev["mail.bstein.dev"]
+  svc_mailu_mailserver_mailu_front["mailu-mailserver/mailu-front (Service)"]
+  host_mail_bstein_dev --> svc_mailu_mailserver_mailu_front
+  host_matrix_live_bstein_dev["matrix.live.bstein.dev"]
+  host_matrix_live_bstein_dev --> svc_comms_matrix_authentication_service
+  host_matrix_live_bstein_dev --> svc_comms_matrix_wellknown
+  host_matrix_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse
+  host_matrix_live_bstein_dev --> svc_comms_matrix_guest_register
+  host_monero_bstein_dev["monero.bstein.dev"]
+  svc_crypto_monerod["crypto/monerod (Service)"]
+  host_monero_bstein_dev --> svc_crypto_monerod
+  wl_crypto_monerod["crypto/monerod (Deployment)"]
+  svc_crypto_monerod --> wl_crypto_monerod
+  host_money_bstein_dev["money.bstein.dev"]
+  svc_finance_firefly["finance/firefly (Service)"]
+  host_money_bstein_dev --> svc_finance_firefly
+  wl_finance_firefly["finance/firefly (Deployment)"]
+  svc_finance_firefly --> wl_finance_firefly
+  host_notes_bstein_dev["notes.bstein.dev"]
+  svc_outline_outline["outline/outline (Service)"]
+  host_notes_bstein_dev --> svc_outline_outline
+  wl_outline_outline["outline/outline (Deployment)"]
+  svc_outline_outline --> wl_outline_outline
+  host_office_bstein_dev["office.bstein.dev"]
+  svc_nextcloud_collabora["nextcloud/collabora (Service)"]
+  host_office_bstein_dev --> svc_nextcloud_collabora
+  wl_nextcloud_collabora["nextcloud/collabora (Deployment)"]
+  svc_nextcloud_collabora --> wl_nextcloud_collabora
+  host_pegasus_bstein_dev["pegasus.bstein.dev"]
+  svc_jellyfin_pegasus["jellyfin/pegasus (Service)"]
+  host_pegasus_bstein_dev --> svc_jellyfin_pegasus
+  wl_jellyfin_pegasus["jellyfin/pegasus (Deployment)"]
+  svc_jellyfin_pegasus --> wl_jellyfin_pegasus
+  host_scm_bstein_dev["scm.bstein.dev"]
+  svc_gitea_gitea["gitea/gitea (Service)"]
+  host_scm_bstein_dev --> svc_gitea_gitea
+  wl_gitea_gitea["gitea/gitea (Deployment)"]
+  svc_gitea_gitea --> wl_gitea_gitea
+  host_secret_bstein_dev["secret.bstein.dev"]
+  svc_vault_vault["vault/vault (Service)"]
+  host_secret_bstein_dev --> svc_vault_vault
+  wl_vault_vault["vault/vault (StatefulSet)"]
+  svc_vault_vault --> wl_vault_vault
+  host_sso_bstein_dev["sso.bstein.dev"]
+  svc_sso_keycloak["sso/keycloak (Service)"]
+  host_sso_bstein_dev --> svc_sso_keycloak
+  wl_sso_keycloak["sso/keycloak (Deployment)"]
+  svc_sso_keycloak --> wl_sso_keycloak
+  host_stream_bstein_dev["stream.bstein.dev"]
+  svc_jellyfin_jellyfin["jellyfin/jellyfin (Service)"]
+  host_stream_bstein_dev --> svc_jellyfin_jellyfin
+  wl_jellyfin_jellyfin["jellyfin/jellyfin (Deployment)"]
+  svc_jellyfin_jellyfin --> wl_jellyfin_jellyfin
+  host_tasks_bstein_dev["tasks.bstein.dev"]
+  svc_planka_planka["planka/planka (Service)"]
+  host_tasks_bstein_dev --> svc_planka_planka
+  wl_planka_planka["planka/planka (Deployment)"]
+  svc_planka_planka --> wl_planka_planka
+  host_vault_bstein_dev["vault.bstein.dev"]
+  svc_vaultwarden_vaultwarden_service["vaultwarden/vaultwarden-service (Service)"]
+  host_vault_bstein_dev --> svc_vaultwarden_vaultwarden_service
+  wl_vaultwarden_vaultwarden["vaultwarden/vaultwarden (Deployment)"]
+  svc_vaultwarden_vaultwarden_service --> wl_vaultwarden_vaultwarden
+
+  subgraph bstein_dev_home[bstein-dev-home]
+    svc_bstein_dev_home_bstein_dev_home_frontend
+    wl_bstein_dev_home_bstein_dev_home_frontend
+    svc_bstein_dev_home_bstein_dev_home_backend
+    wl_bstein_dev_home_bstein_dev_home_backend
+    svc_bstein_dev_home_chat_ai_gateway
+    wl_bstein_dev_home_chat_ai_gateway
+  end
+  subgraph comms[comms]
+    svc_comms_matrix_wellknown
+    wl_comms_matrix_wellknown
+    svc_comms_element_call
+    wl_comms_element_call
+    svc_comms_livekit_token_service
+    wl_comms_livekit_token_service
+    svc_comms_livekit
+    wl_comms_livekit
+    svc_comms_othrys_synapse_matrix_synapse
+    svc_comms_matrix_guest_register
+    wl_comms_matrix_guest_register
+    svc_comms_matrix_authentication_service
+    wl_comms_matrix_authentication_service
+  end
+  subgraph crypto[crypto]
+    svc_crypto_monerod
+    wl_crypto_monerod
+  end
+  subgraph finance[finance]
+    svc_finance_actual_budget
+    wl_finance_actual_budget
+    svc_finance_firefly
+    wl_finance_firefly
+  end
+  subgraph gitea[gitea]
+    svc_gitea_gitea
+    wl_gitea_gitea
+  end
+  subgraph health[health]
+    svc_health_wger
+    wl_health_wger
+  end
+  subgraph jellyfin[jellyfin]
+    svc_jellyfin_pegasus
+    wl_jellyfin_pegasus
+    svc_jellyfin_jellyfin
+    wl_jellyfin_jellyfin
+  end
+  subgraph jenkins[jenkins]
+    svc_jenkins_jenkins
+    wl_jenkins_jenkins
+  end
+  subgraph logging[logging]
+    svc_logging_oauth2_proxy_logs
+    wl_logging_oauth2_proxy_logs
+  end
+  subgraph longhorn_system[longhorn-system]
+    svc_longhorn_system_oauth2_proxy_longhorn
+    wl_longhorn_system_oauth2_proxy_longhorn
+  end
+  subgraph mailu_mailserver[mailu-mailserver]
+    svc_mailu_mailserver_mailu_front
+  end
+  subgraph nextcloud[nextcloud]
+    svc_nextcloud_nextcloud
+    wl_nextcloud_nextcloud
+    svc_nextcloud_collabora
+    wl_nextcloud_collabora
+  end
+  subgraph outline[outline]
+    svc_outline_outline
+    wl_outline_outline
+  end
+  subgraph planka[planka]
+    svc_planka_planka
+    wl_planka_planka
+  end
+  subgraph sso[sso]
+    svc_sso_oauth2_proxy
+    wl_sso_oauth2_proxy
+    svc_sso_keycloak
+    wl_sso_keycloak
+  end
+  subgraph vault[vault]
+    svc_vault_vault
+    wl_vault_vault
+  end
+  subgraph vaultwarden[vaultwarden]
+    svc_vaultwarden_vaultwarden_service
+    wl_vaultwarden_vaultwarden
+  end
--- a/services/atlasbot/kustomization.yaml
+++ b/services/atlasbot/kustomization.yaml
@ -0,0 +1,29 @@
+# services/atlasbot/kustomization.yaml
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+namespace: ai
+resources:
+  - atlasbot-deployment.yaml
+  - atlasbot-service.yaml
+  - atlasbot-rbac.yaml
+  - secretproviderclass.yaml
+  - vault-sync-deployment.yaml
+  - image.yaml
+  - image-automation.yaml
+images:
+  - name: registry.bstein.dev/bstein/atlasbot
+    newTag: 0.1.2-106 # {"$imagepolicy": "ai:atlasbot:tag"}
+configMapGenerator:
+  - name: atlasbot-vault-env
+    files:
+      - atlasbot_vault_env.sh=scripts/atlasbot_vault_env.sh
+    options:
+      disableNameSuffixHash: true
+  - name: atlas-kb
+    files:
+      - INDEX.md=knowledge/INDEX.md
+      - atlas.json=knowledge/catalog/atlas.json
+      - atlas-summary.json=knowledge/catalog/atlas-summary.json
+      - metrics.json=knowledge/catalog/metrics.json
+      - runbooks.json=knowledge/catalog/runbooks.json
+      - atlas-http.mmd=knowledge/diagrams/atlas-http.mmd
--- a/services/atlasbot/scripts/atlasbot_vault_env.sh
+++ b/services/atlasbot/scripts/atlasbot_vault_env.sh
@ -0,0 +1,44 @@
+#!/usr/bin/env sh
+set -eu
+
+vault_dir="/vault/secrets"
+
+read_secret() {
+  tr -d '\r\n' < "${vault_dir}/$1"
+}
+
+read_optional() {
+  if [ -f "${vault_dir}/$1" ]; then
+    tr -d '\r\n' < "${vault_dir}/$1"
+  else
+    printf ''
+  fi
+}
+
+export TURN_STATIC_AUTH_SECRET="$(read_secret turn-secret)"
+export TURN_PASSWORD="${TURN_STATIC_AUTH_SECRET}"
+
+export LIVEKIT_API_SECRET="$(read_secret livekit-primary)"
+export LIVEKIT_SECRET="${LIVEKIT_API_SECRET}"
+
+export BOT_PASS="$(read_secret bot-pass)"
+export BOT_PASS_QUICK="$(read_optional bot-quick-pass)"
+export BOT_PASS_SMART="$(read_optional bot-smart-pass)"
+export BOT_PASS_GENIUS="$(read_optional bot-genius-pass)"
+if [ -z "${BOT_PASS_SMART}" ]; then
+  export BOT_PASS_SMART="${BOT_PASS}"
+fi
+if [ -z "${BOT_PASS_GENIUS}" ]; then
+  export BOT_PASS_GENIUS="${BOT_PASS_SMART}"
+fi
+export SEEDER_PASS="$(read_secret seeder-pass)"
+
+export CHAT_API_KEY="$(read_secret chat-matrix)"
+export CHAT_API_HOMEPAGE="$(read_secret chat-homepage)"
+
+export MAS_ADMIN_CLIENT_SECRET_FILE="${vault_dir}/mas-admin-secret"
+export PGPASSWORD="$(read_secret synapse-db-pass)"
+
+export MAS_DB_PASSWORD="$(read_secret mas-db-pass)"
+export MATRIX_SHARED_SECRET="$(read_secret mas-matrix-shared)"
+export KEYCLOAK_CLIENT_SECRET="$(read_secret mas-kc-secret)"
--- a/services/atlasbot/secretproviderclass.yaml
+++ b/services/atlasbot/secretproviderclass.yaml
@ -1,14 +1,14 @@
-# services/typhon/secretproviderclass.yaml
+# services/atlasbot/secretproviderclass.yaml
 apiVersion: secrets-store.csi.x-k8s.io/v1
 kind: SecretProviderClass
 metadata:
-  name: typhon-vault
-  namespace: climate
+  name: atlasbot-vault
+  namespace: ai
 spec:
  provider: vault
  parameters:
    vaultAddress: "http://vault.vault.svc.cluster.local:8200"
-    roleName: "typhon"
+    roleName: "ai"
    objects: |
      - objectName: "harbor-pull__dockerconfigjson"
        secretPath: "kv/data/atlas/shared/harbor-pull"
--- a/Show More
+++ b/Show More