scheduling: keep longhorn vault sync off storage nodes

scheduling: keep singleton apps off storage nodes
recovery(atlas): stop post-outage control-plane churn
2026-05-05 13:46:19 -03:00 · 2026-05-05 13:37:04 -03:00 · 2026-05-05 10:42:28 -03:00 · 2026-05-05 06:42:15 -03:00 · 2026-05-05 06:31:09 -03:00 · 2026-05-05 06:19:15 -03:00
111 changed files with 19460 additions and 5002 deletions
--- a/324
+++ b/324
@ -7,14 +7,24 @@ pipeline {
 apiVersion: v1
 kind: Pod
 spec:
  serviceAccountName: "jenkins"
  nodeSelector:
    hardware: rpi5
    kubernetes.io/arch: arm64
    node-role.kubernetes.io/worker: "true"
  containers:
    - name: jnlp
      image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
      resources:
        requests:
          cpu: "25m"
          memory: "256Mi"
    - name: python
-      image: python:3.12-slim
+      image: registry.bstein.dev/bstein/python:3.12-slim
      command:
        - cat
      tty: true
    - name: quality-tools
      image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
      command:
        - cat
      tty: true
@ -24,9 +34,21 @@ spec:
  environment {
    PIP_DISABLE_PIP_VERSION_CHECK = '1'
    PYTHONUNBUFFERED = '1'
-    SUITE_NAME = 'titan-iac'
+    SUITE_NAME = 'titan_iac'
    PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
    SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
    SONARQUBE_PROJECT_KEY = 'titan_iac'
    SONARQUBE_TOKEN = credentials('sonarqube-token')
    VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
    QUALITY_GATE_SONARQUBE_ENFORCE = '1'
    QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
    QUALITY_GATE_IRONBANK_ENFORCE = '1'
    QUALITY_GATE_IRONBANK_REQUIRED = '0'
    QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
  }
  options {
    disableConcurrentBuilds()
    buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
  }
  stages {
    stage('Checkout') {
@ -36,7 +58,175 @@ spec:
    }
    stage('Install deps') {
      steps {
-        sh 'pip install --no-cache-dir -r ci/requirements.txt'
+        sh '''
          set -eu
          if ! command -v git >/dev/null 2>&1; then
            apt-get update
            apt-get install -y --no-install-recommends git ca-certificates
            rm -rf /var/lib/apt/lists/*
          fi
          pip install --no-cache-dir -r ci/requirements.txt
        '''
      }
    }
    stage('Prepare local quality evidence') {
      steps {
        sh '''
          set -eu
          mkdir -p build
          set +e
          python3 -m testing.quality_gate --profile local --build-dir build
          local_quality_rc=$?
          set -e
          printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
        '''
      }
    }
    stage('Collect SonarQube evidence') {
      steps {
        container('quality-tools') {
          sh '''#!/usr/bin/env bash
            set -euo pipefail
            mkdir -p build
            args=(
              "-Dsonar.host.url=${SONARQUBE_HOST_URL}"
              "-Dsonar.login=${SONARQUBE_TOKEN}"
              "-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
              "-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
              "-Dsonar.sources=."
              "-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml"
              "-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
            )
            [ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
            set +e
            sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
            rc=${PIPESTATUS[0]}
            set -e
            printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
          '''
        }
        sh '''
          set -eu
          mkdir -p build
          python3 - <<'PY'
 import base64
 import json
 import os
 import time
 import urllib.parse
 import urllib.request
 from pathlib import Path
 host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
 project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
 token = os.getenv('SONARQUBE_TOKEN', '').strip()
 report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
 payload = {
    "status": "ERROR",
    "note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
 }
 if host and project_key:
    task_file = Path('.scannerwork/report-task.txt')
    task_id = ''
    if task_file.exists():
        for line in task_file.read_text(encoding='utf-8').splitlines():
            key, _, value = line.partition('=')
            if key == 'ceTaskId':
                task_id = value.strip()
                break
    if task_id:
        ce_query = urllib.parse.urlencode({"id": task_id})
        deadline = time.monotonic() + 180
        while time.monotonic() < deadline:
            ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
            if token:
                encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
                ce_request.add_header("Authorization", f"Basic {encoded}")
            try:
                with urllib.request.urlopen(ce_request, timeout=12) as response:
                    ce_payload = json.loads(response.read().decode("utf-8"))
            except Exception:
                time.sleep(3)
                continue
            status = str(ce_payload.get("task", {}).get("status", "")).upper()
            if status in {"SUCCESS", "FAILED", "CANCELED"}:
                break
            time.sleep(3)
    query = urllib.parse.urlencode({"projectKey": project_key})
    request = urllib.request.Request(
        f"{host}/api/qualitygates/project_status?{query}",
        method="GET",
    )
    if token:
        encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
        request.add_header("Authorization", f"Basic {encoded}")
    try:
        with urllib.request.urlopen(request, timeout=12) as response:
            payload = json.loads(response.read().decode("utf-8"))
    except Exception as exc:  # noqa: BLE001
        payload = {"status": "ERROR", "error": str(exc)}
 with open(report_path, "w", encoding="utf-8") as handle:
    json.dump(payload, handle, indent=2, sort_keys=True)
    handle.write("\\n")
 PY
        '''
      }
    }
    stage('Collect IronBank evidence') {
      steps {
        container('quality-tools') {
          sh '''#!/usr/bin/env bash
            set -euo pipefail
            mkdir -p build
            set +e
            trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
            trivy_rc=$?
            set -e
            if [ ! -s build/trivy-fs.json ]; then
              cat > build/ironbank-compliance.json <<EOF
 {"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
 EOF
              exit 0
            fi
          '''
        }
        sh '''
          set -eu
          mkdir -p build
          if [ -s build/trivy-fs.json ]; then
            python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
            exit 0
          fi
          python3 - <<'PY'
 import json
 import os
 from pathlib import Path
 report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
 if report_path.exists():
    raise SystemExit(0)
 status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
 compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
 payload = {
    "status": status or "unknown",
    "compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
 }
 payload = {k: v for k, v in payload.items() if v is not None}
 if "status" not in payload:
    payload["status"] = "unknown"
 payload["note"] = (
    "Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
    "or write build/ironbank-compliance.json in image-building repos."
 )
 report_path.parent.mkdir(parents=True, exist_ok=True)
 report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
 PY
        '''
      }
    }
    stage('Run quality gate') {
@ -66,8 +256,96 @@ spec:
    stage('Enforce quality gate') {
      steps {
        sh '''
-          set -eu
+          set -euo pipefail
-          test "$(cat build/quality-gate.rc 2>/dev/null || echo 1)" -eq 0
+          gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)"
          fail=0
          if [ "${gate_rc}" -ne 0 ]; then
            echo "quality gate failed with rc=${gate_rc}" >&2
            fail=1
          fi
          enabled() {
            case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
              1|true|yes|on) return 0 ;;
              *) return 1 ;;
            esac
          }
          if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
            sonar_status="$(python3 - <<'PY'
 import json
 from pathlib import Path
 path = Path("build/sonarqube-quality-gate.json")
 if not path.exists():
    print("missing")
    raise SystemExit(0)
 try:
    payload = json.loads(path.read_text(encoding="utf-8"))
 except Exception:  # noqa: BLE001
    print("error")
    raise SystemExit(0)
 status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
 print(status or "missing")
 PY
 )"
            case "${sonar_status}" in
              ok|pass|passed|success) ;;
              *)
                echo "sonarqube gate failed: ${sonar_status}" >&2
                fail=1
                ;;
            esac
          fi
          ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
          if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
            ironbank_required=1
          fi
          if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
            supply_status="$(python3 - <<'PY'
 import json
 from pathlib import Path
 path = Path("build/ironbank-compliance.json")
 if not path.exists():
    print("missing")
    raise SystemExit(0)
 try:
    payload = json.loads(path.read_text(encoding="utf-8"))
 except Exception:  # noqa: BLE001
    print("error")
    raise SystemExit(0)
 compliant = payload.get("compliant")
 if compliant is True:
    print("ok")
 elif compliant is False:
    print("failed")
 else:
    status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
    print(status or "missing")
 PY
 )"
            case "${supply_status}" in
              ok|pass|passed|success|compliant) ;;
              not_applicable|na|n/a)
                if enabled "${ironbank_required}"; then
                  echo "supply chain gate required but status=${supply_status}" >&2
                  fail=1
                fi
                ;;
              *)
                if enabled "${ironbank_required}"; then
                  echo "supply chain gate failed: ${supply_status}" >&2
                  fail=1
                else
                  echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
                fi
                ;;
            esac
          fi
          exit "${fail}"
        '''
      }
    }
@ -76,7 +354,7 @@ spec:
        script {
          env.FLUX_BRANCH = sh(
            returnStdout: true,
-            script: '''awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml'''
+            script: "grep -m1 '^\\s*branch:' clusters/atlas/flux-system/gotk-sync.yaml | sed 's/^\\s*branch:\\s*//'"
          ).trim()
          if (!env.FLUX_BRANCH) {
            error('Flux branch not found in gotk-sync.yaml')
@ -93,16 +371,28 @@ spec:
        }
      }
      steps {
-        container('jnlp') {
+        withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
-          withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
+          sh '''
-            sh '''
+            set -euo pipefail
-              set +x
+            if ! command -v git >/dev/null 2>&1; then
-              git config user.email "jenkins@bstein.dev"
+              if command -v apk >/dev/null 2>&1; then
-              git config user.name "jenkins"
+                apk add --no-cache git >/dev/null
-              git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git
+              elif command -v apt-get >/dev/null 2>&1; then
-              git push origin HEAD:${FLUX_BRANCH}
+                apt-get update >/dev/null
-            '''
+                apt-get install -y git >/dev/null
-          }
+              fi
            fi
            cd "${WORKSPACE:-$PWD}"
            if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
              echo "workspace is not a git checkout; skipping promote"
              exit 0
            fi
            set +x
            git config user.email "jenkins@bstein.dev"
            git config user.name "jenkins"
            git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git
            git push origin HEAD:${FLUX_BRANCH}
          '''
        }
      }
    }
--- a/ci/Jenkinsfile.titan-iac
+++ b/ci/Jenkinsfile.titan-iac
@ -6,14 +6,24 @@ pipeline {
 apiVersion: v1
 kind: Pod
 spec:
  serviceAccountName: "jenkins"
  nodeSelector:
    hardware: rpi5
    kubernetes.io/arch: arm64
    node-role.kubernetes.io/worker: "true"
  containers:
    - name: jnlp
      image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
      resources:
        requests:
          cpu: "25m"
          memory: "256Mi"
    - name: python
-      image: python:3.12-slim
+      image: registry.bstein.dev/bstein/python:3.12-slim
      command:
        - cat
      tty: true
    - name: quality-tools
      image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
      command:
        - cat
      tty: true
@ -23,9 +33,21 @@ spec:
  environment {
    PIP_DISABLE_PIP_VERSION_CHECK = '1'
    PYTHONUNBUFFERED = '1'
-    SUITE_NAME = 'titan-iac'
+    SUITE_NAME = 'titan_iac'
    PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
    SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
    SONARQUBE_PROJECT_KEY = 'titan_iac'
    SONARQUBE_TOKEN = credentials('sonarqube-token')
    VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
    QUALITY_GATE_SONARQUBE_ENFORCE = '1'
    QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
    QUALITY_GATE_IRONBANK_ENFORCE = '1'
    QUALITY_GATE_IRONBANK_REQUIRED = '0'
    QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
  }
  options {
    disableConcurrentBuilds()
    buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
  }
  stages {
    stage('Checkout') {
@ -35,7 +57,175 @@ spec:
    }
    stage('Install deps') {
      steps {
-        sh 'pip install --no-cache-dir -r ci/requirements.txt'
+        sh '''
          set -eu
          if ! command -v git >/dev/null 2>&1; then
            apt-get update
            apt-get install -y --no-install-recommends git ca-certificates
            rm -rf /var/lib/apt/lists/*
          fi
          pip install --no-cache-dir -r ci/requirements.txt
        '''
      }
    }
    stage('Prepare local quality evidence') {
      steps {
        sh '''
          set -eu
          mkdir -p build
          set +e
          python3 -m testing.quality_gate --profile local --build-dir build
          local_quality_rc=$?
          set -e
          printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
        '''
      }
    }
    stage('Collect SonarQube evidence') {
      steps {
        container('quality-tools') {
          sh '''#!/usr/bin/env bash
            set -euo pipefail
            mkdir -p build
            args=(
              "-Dsonar.host.url=${SONARQUBE_HOST_URL}"
              "-Dsonar.login=${SONARQUBE_TOKEN}"
              "-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
              "-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
              "-Dsonar.sources=."
              "-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml"
              "-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
            )
            [ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
            set +e
            sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
            rc=${PIPESTATUS[0]}
            set -e
            printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
          '''
        }
        sh '''
          set -eu
          mkdir -p build
          python3 - <<'PY'
 import base64
 import json
 import os
 import time
 import urllib.parse
 import urllib.request
 from pathlib import Path
 host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
 project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
 token = os.getenv('SONARQUBE_TOKEN', '').strip()
 report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
 payload = {
    "status": "ERROR",
    "note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
 }
 if host and project_key:
    task_file = Path('.scannerwork/report-task.txt')
    task_id = ''
    if task_file.exists():
        for line in task_file.read_text(encoding='utf-8').splitlines():
            key, _, value = line.partition('=')
            if key == 'ceTaskId':
                task_id = value.strip()
                break
    if task_id:
        ce_query = urllib.parse.urlencode({"id": task_id})
        deadline = time.monotonic() + 180
        while time.monotonic() < deadline:
            ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
            if token:
                encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
                ce_request.add_header("Authorization", f"Basic {encoded}")
            try:
                with urllib.request.urlopen(ce_request, timeout=12) as response:
                    ce_payload = json.loads(response.read().decode("utf-8"))
            except Exception:
                time.sleep(3)
                continue
            status = str(ce_payload.get("task", {}).get("status", "")).upper()
            if status in {"SUCCESS", "FAILED", "CANCELED"}:
                break
            time.sleep(3)
    query = urllib.parse.urlencode({"projectKey": project_key})
    request = urllib.request.Request(
        f"{host}/api/qualitygates/project_status?{query}",
        method="GET",
    )
    if token:
        encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
        request.add_header("Authorization", f"Basic {encoded}")
    try:
        with urllib.request.urlopen(request, timeout=12) as response:
            payload = json.loads(response.read().decode("utf-8"))
    except Exception as exc:  # noqa: BLE001
        payload = {"status": "ERROR", "error": str(exc)}
 with open(report_path, "w", encoding="utf-8") as handle:
    json.dump(payload, handle, indent=2, sort_keys=True)
    handle.write("\\n")
 PY
        '''
      }
    }
    stage('Collect IronBank evidence') {
      steps {
        container('quality-tools') {
          sh '''#!/usr/bin/env bash
            set -euo pipefail
            mkdir -p build
            set +e
            trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
            trivy_rc=$?
            set -e
            if [ ! -s build/trivy-fs.json ]; then
              cat > build/ironbank-compliance.json <<EOF
 {"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
 EOF
              exit 0
            fi
          '''
        }
        sh '''
          set -eu
          mkdir -p build
          if [ -s build/trivy-fs.json ]; then
            python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
            exit 0
          fi
          python3 - <<'PY'
 import json
 import os
 from pathlib import Path
 report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
 if report_path.exists():
    raise SystemExit(0)
 status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
 compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
 payload = {
    "status": status or "unknown",
    "compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
 }
 payload = {k: v for k, v in payload.items() if v is not None}
 if "status" not in payload:
    payload["status"] = "unknown"
 payload["note"] = (
    "Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
    "or write build/ironbank-compliance.json in image-building repos."
 )
 report_path.parent.mkdir(parents=True, exist_ok=True)
 report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
 PY
        '''
      }
    }
    stage('Run quality gate') {
@ -65,8 +255,96 @@ spec:
    stage('Enforce quality gate') {
      steps {
        sh '''
-          set -eu
+          set -euo pipefail
-          test "$(cat build/quality-gate.rc 2>/dev/null || echo 1)" -eq 0
+          gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)"
          fail=0
          if [ "${gate_rc}" -ne 0 ]; then
            echo "quality gate failed with rc=${gate_rc}" >&2
            fail=1
          fi
          enabled() {
            case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
              1|true|yes|on) return 0 ;;
              *) return 1 ;;
            esac
          }
          if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
            sonar_status="$(python3 - <<'PY'
 import json
 from pathlib import Path
 path = Path("build/sonarqube-quality-gate.json")
 if not path.exists():
    print("missing")
    raise SystemExit(0)
 try:
    payload = json.loads(path.read_text(encoding="utf-8"))
 except Exception:  # noqa: BLE001
    print("error")
    raise SystemExit(0)
 status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
 print(status or "missing")
 PY
 )"
            case "${sonar_status}" in
              ok|pass|passed|success) ;;
              *)
                echo "sonarqube gate failed: ${sonar_status}" >&2
                fail=1
                ;;
            esac
          fi
          ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
          if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
            ironbank_required=1
          fi
          if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
            supply_status="$(python3 - <<'PY'
 import json
 from pathlib import Path
 path = Path("build/ironbank-compliance.json")
 if not path.exists():
    print("missing")
    raise SystemExit(0)
 try:
    payload = json.loads(path.read_text(encoding="utf-8"))
 except Exception:  # noqa: BLE001
    print("error")
    raise SystemExit(0)
 compliant = payload.get("compliant")
 if compliant is True:
    print("ok")
 elif compliant is False:
    print("failed")
 else:
    status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
    print(status or "missing")
 PY
 )"
            case "${supply_status}" in
              ok|pass|passed|success|compliant) ;;
              not_applicable|na|n/a)
                if enabled "${ironbank_required}"; then
                  echo "supply chain gate required but status=${supply_status}" >&2
                  fail=1
                fi
                ;;
              *)
                if enabled "${ironbank_required}"; then
                  echo "supply chain gate failed: ${supply_status}" >&2
                  fail=1
                else
                  echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
                fi
                ;;
            esac
          fi
          exit "${fail}"
        '''
      }
    }
@ -75,7 +353,7 @@ spec:
        script {
          env.FLUX_BRANCH = sh(
            returnStdout: true,
-            script: '''awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml'''
+            script: "awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml"
          ).trim()
          if (!env.FLUX_BRANCH) {
            error('Flux branch not found in gotk-sync.yaml')
@ -92,16 +370,28 @@ spec:
        }
      }
      steps {
-        container('jnlp') {
+        withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
-          withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
+          sh '''
-            sh '''
+            set -euo pipefail
-              set +x
+            if ! command -v git >/dev/null 2>&1; then
-              git config user.email "jenkins@bstein.dev"
+              if command -v apk >/dev/null 2>&1; then
-              git config user.name "jenkins"
+                apk add --no-cache git >/dev/null
-              git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git
+              elif command -v apt-get >/dev/null 2>&1; then
-              git push origin HEAD:${FLUX_BRANCH}
+                apt-get update >/dev/null
-            '''
+                apt-get install -y git >/dev/null
-          }
+              fi
            fi
            cd "${WORKSPACE:-$PWD}"
            if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
              echo "workspace is not a git checkout; skipping promote"
              exit 0
            fi
            set +x
            git config user.email "jenkins@bstein.dev"
            git config user.name "jenkins"
            git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git
            git push origin HEAD:${FLUX_BRANCH}
          '''
        }
      }
    }
--- a/ci/scripts/publish_test_metrics.py
+++ b/ci/scripts/publish_test_metrics.py
@ -6,30 +6,50 @@ from __future__ import annotations
 import json
 import os
 from glob import glob
 from pathlib import Path
 import sys
 import urllib.error
 import urllib.request
 import xml.etree.ElementTree as ET
 sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
 from ci.scripts import publish_test_metrics_quality as _quality_helpers
 CANONICAL_CHECKS = _quality_helpers.CANONICAL_CHECKS
 _build_check_statuses = _quality_helpers._build_check_statuses
 _combine_statuses = _quality_helpers._combine_statuses
 _infer_sonarqube_status = _quality_helpers._infer_sonarqube_status
 _infer_source_lines_over_500 = _quality_helpers._infer_source_lines_over_500
 _infer_supply_chain_status = _quality_helpers._infer_supply_chain_status
 _infer_workspace_coverage_percent = _quality_helpers._infer_workspace_coverage_percent
 _load_optional_json = _quality_helpers._load_optional_json
 _normalize_result_status = _quality_helpers._normalize_result_status
 def _escape_label(value: str) -> str:
    """Escape a Prometheus label value without changing its content."""
    return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
 def _label_str(labels: dict[str, str]) -> str:
    """Render a stable Prometheus label set from a mapping."""
    parts = [f'{key}="{_escape_label(val)}"' for key, val in labels.items() if val]
    return "{" + ",".join(parts) + "}" if parts else ""
 def _read_text(url: str) -> str:
    """Fetch a plain-text response body from the given URL."""
    with urllib.request.urlopen(url, timeout=10) as response:
        return response.read().decode("utf-8")
 def _post_text(url: str, payload: str) -> None:
    """PUT a plain-text payload and fail on any 4xx/5xx response."""
    request = urllib.request.Request(
        url,
        data=payload.encode("utf-8"),
-        method="POST",
+        method="PUT",
        headers={"Content-Type": "text/plain"},
    )
    with urllib.request.urlopen(request, timeout=10) as response:
@ -38,6 +58,7 @@ def _post_text(url: str, payload: str) -> None:
 def _parse_junit(path: str) -> dict[str, int]:
    """Parse a JUnit XML file into aggregate test counters."""
    if not os.path.exists(path):
        return {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
@ -64,6 +85,7 @@ def _parse_junit(path: str) -> dict[str, int]:
 def _collect_junit_totals(pattern: str) -> dict[str, int]:
    """Sum JUnit counters across every XML file matching the pattern."""
    totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
    for path in sorted(glob(pattern)):
        parsed = _parse_junit(path)
@ -72,7 +94,38 @@ def _collect_junit_totals(pattern: str) -> dict[str, int]:
    return totals
 def _collect_junit_cases(pattern: str) -> list[tuple[str, str]]:
    """Collect individual JUnit test-case statuses for flaky-test trend panels."""
    cases: list[tuple[str, str]] = []
    for path in sorted(glob(pattern)):
        if not os.path.exists(path):
            continue
        root = ET.parse(path).getroot()
        suites: list[ET.Element]
        if root.tag == "testsuite":
            suites = [root]
        elif root.tag == "testsuites":
            suites = [elem for elem in root if elem.tag == "testsuite"]
        else:
            suites = []
        for suite in suites:
            for test_case in suite.findall("testcase"):
                case_name = test_case.attrib.get("name", "").strip()
                class_name = test_case.attrib.get("classname", "").strip()
                if not case_name:
                    continue
                full_name = f"{class_name}.{case_name}" if class_name else case_name
                status = "passed"
                if test_case.find("failure") is not None or test_case.find("error") is not None:
                    status = "failed"
                elif test_case.find("skipped") is not None:
                    status = "skipped"
                cases.append((full_name, status))
    return cases
 def _read_exit_code(path: str) -> int:
    """Read the quality-gate exit code, defaulting to failure if missing."""
    try:
        with open(path, "r", encoding="utf-8") as handle:
            return int(handle.read().strip())
@ -81,6 +134,7 @@ def _read_exit_code(path: str) -> int:
 def _load_summary(path: str) -> dict:
    """Load the JSON quality-gate summary, returning an empty mapping on error."""
    try:
        with open(path, "r", encoding="utf-8") as handle:
            return json.load(handle)
@ -89,6 +143,7 @@ def _load_summary(path: str) -> dict:
 def _summary_float(summary: dict, key: str) -> float:
    """Extract a float-like value from the summary, defaulting to 0.0."""
    value = summary.get(key)
    if isinstance(value, (int, float)):
        return float(value)
@ -96,6 +151,7 @@ def _summary_float(summary: dict, key: str) -> float:
 def _summary_int(summary: dict, key: str) -> int:
    """Extract an int-like value from the summary, defaulting to 0."""
    value = summary.get(key)
    if isinstance(value, int):
        return value
@ -105,6 +161,7 @@ def _summary_int(summary: dict, key: str) -> int:
 def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float:
    """Return the current counter value for a labeled metric if present."""
    text = _read_text(f"{pushgateway_url.rstrip('/')}/metrics")
    for line in text.splitlines():
        if not line.startswith(metric + "{"):
@ -125,22 +182,33 @@ def _build_payload(
    suite: str,
    status: str,
    tests: dict[str, int],
    test_cases: list[tuple[str, str]],
    ok_count: int,
    failed_count: int,
    branch: str,
    build_number: str,
    jenkins_job: str,
    summary: dict | None = None,
    workspace_line_coverage_percent: float = 0.0,
    source_lines_over_500: int = 0,
    check_statuses: dict[str, str] | None = None,
 ) -> str:
    """Build the Pushgateway payload for the current suite run."""
    passed = max(tests["tests"] - tests["failures"] - tests["errors"] - tests["skipped"], 0)
    build_labels = _label_str(
        {
            "suite": suite,
            "branch": branch or "unknown",
            "build_number": build_number or "unknown",
            "jenkins_job": jenkins_job or suite,
        }
    )
    test_case_base_labels = {
        "suite": suite,
        "branch": branch or "unknown",
        "build_number": build_number or "unknown",
        "jenkins_job": jenkins_job or suite,
    }
    lines = [
        "# TYPE platform_quality_gate_runs_total counter",
        f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count}',
@ -153,6 +221,8 @@ def _build_payload(
        "# TYPE titan_iac_quality_gate_run_status gauge",
        f'titan_iac_quality_gate_run_status{{suite="{suite}",status="ok"}} {1 if status == "ok" else 0}',
        f'titan_iac_quality_gate_run_status{{suite="{suite}",status="failed"}} {1 if status == "failed" else 0}',
        "# TYPE platform_quality_gate_build_info gauge",
        f"platform_quality_gate_build_info{build_labels} 1",
        "# TYPE titan_iac_quality_gate_build_info gauge",
        f"titan_iac_quality_gate_build_info{build_labels} 1",
        "# TYPE platform_quality_gate_workspace_line_coverage_percent gauge",
@ -160,36 +230,69 @@ def _build_payload(
        "# TYPE platform_quality_gate_source_lines_over_500_total gauge",
        f'platform_quality_gate_source_lines_over_500_total{{suite="{suite}"}} {source_lines_over_500}',
    ]
-    results = summary.get("results", []) if isinstance(summary, dict) else []
+    if check_statuses:
    if results:
        lines.append("# TYPE titan_iac_quality_gate_checks_total gauge")
-        for result in results:
+        for check_name in CANONICAL_CHECKS:
-            check_name = result.get("name")
+            check_status = check_statuses.get(check_name, "not_applicable")
            check_status = result.get("status")
            if not check_name or not check_status:
                continue
            lines.append(
-                f'titan_iac_quality_gate_checks_total{{suite="{suite}",check="{_escape_label(str(check_name))}",result="{_escape_label(str(check_status))}"}} 1'
+                f'titan_iac_quality_gate_checks_total{{suite="{suite}",check="{_escape_label(check_name)}",result="{_escape_label(check_status)}"}} 1'
            )
    lines.append("# TYPE platform_quality_gate_test_case_result gauge")
    if test_cases:
        for test_name, test_status in test_cases:
            labels = {
                **test_case_base_labels,
                "test": test_name,
                "status": test_status,
            }
            lines.append(
                f"platform_quality_gate_test_case_result{_label_str(labels)} 1"
            )
    else:
        labels = {**test_case_base_labels, "test": "__no_test_cases__", "status": "skipped"}
        lines.append(
            f"platform_quality_gate_test_case_result{_label_str(labels)} 1"
        )
    return "\n".join(lines) + "\n"
 def main() -> int:
-    suite = os.getenv("SUITE_NAME", "titan-iac")
+    """Publish the quality-gate metrics and print a compact run summary."""
    suite = os.getenv("SUITE_NAME", "titan_iac")
    pushgateway_url = os.getenv("PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091")
    job_name = os.getenv("QUALITY_GATE_JOB_NAME", "platform-quality-ci")
    junit_glob = os.getenv("JUNIT_GLOB", os.getenv("JUNIT_PATH", "build/junit-*.xml"))
    exit_code_path = os.getenv("QUALITY_GATE_EXIT_CODE_PATH", os.getenv("GLUE_EXIT_CODE_PATH", "build/quality-gate.rc"))
    summary_path = os.getenv("QUALITY_GATE_SUMMARY_PATH", "build/quality-gate-summary.json")
-    branch = os.getenv("BRANCH_NAME", os.getenv("GIT_BRANCH", ""))
+    branch = os.getenv("BRANCH_NAME") or os.getenv("GIT_BRANCH") or "unknown"
    if branch.startswith("origin/"):
        branch = branch[len("origin/") :]
    build_number = os.getenv("BUILD_NUMBER", "")
    jenkins_job = os.getenv("JOB_NAME", "titan-iac")
    tests = _collect_junit_totals(junit_glob)
    test_cases = _collect_junit_cases(junit_glob)
    exit_code = _read_exit_code(exit_code_path)
    status = "ok" if exit_code == 0 else "failed"
    summary = _load_summary(summary_path)
    workspace_line_coverage_percent = _summary_float(summary, "workspace_line_coverage_percent")
    if workspace_line_coverage_percent <= 0:
        workspace_line_coverage_percent = _infer_workspace_coverage_percent(summary, "build/coverage-unit.xml")
    source_lines_over_500 = _summary_int(summary, "source_lines_over_500")
    if source_lines_over_500 <= 0:
        source_lines_over_500 = _infer_source_lines_over_500(summary)
    sonarqube_report = _load_optional_json(os.getenv("QUALITY_GATE_SONARQUBE_REPORT", "build/sonarqube-quality-gate.json"))
    supply_chain_report = _load_optional_json(os.getenv("QUALITY_GATE_IRONBANK_REPORT", "build/ironbank-compliance.json"))
    supply_chain_required = os.getenv("QUALITY_GATE_IRONBANK_REQUIRED", "0").strip().lower() in {"1", "true", "yes", "on"}
    check_statuses = _build_check_statuses(
        summary=summary,
        tests=tests,
        workspace_line_coverage_percent=workspace_line_coverage_percent,
        source_lines_over_500=source_lines_over_500,
        sonarqube_report=sonarqube_report,
        supply_chain_report=supply_chain_report,
        supply_chain_required=supply_chain_required,
    )
    ok_count = int(
        _fetch_existing_counter(
@ -214,13 +317,16 @@ def main() -> int:
        suite=suite,
        status=status,
        tests=tests,
        test_cases=test_cases,
        ok_count=ok_count,
        failed_count=failed_count,
        branch=branch,
        build_number=build_number,
        jenkins_job=jenkins_job,
        summary=summary,
        workspace_line_coverage_percent=workspace_line_coverage_percent,
        source_lines_over_500=source_lines_over_500,
        check_statuses=check_statuses,
    )
    push_url = f"{pushgateway_url.rstrip('/')}/metrics/job/{job_name}/suite/{suite}"
    _post_text(push_url, payload)
@ -234,7 +340,7 @@ def main() -> int:
        "tests_skipped": tests["skipped"],
        "ok_count": ok_count,
        "failed_count": failed_count,
-        "checks_recorded": len(summary.get("results", [])) if isinstance(summary, dict) else 0,
+        "checks_recorded": len(check_statuses),
        "workspace_line_coverage_percent": workspace_line_coverage_percent,
        "source_lines_over_500": source_lines_over_500,
    }
@ -242,5 +348,5 @@ def main() -> int:
    return 0
-if __name__ == "__main__":
+if __name__ == "__main__":  # pragma: no cover
    raise SystemExit(main())
--- a/ci/scripts/publish_test_metrics_quality.py
+++ b/ci/scripts/publish_test_metrics_quality.py
@ -0,0 +1,200 @@
 #!/usr/bin/env python3
 """Quality/status helpers for publish_test_metrics."""
 from __future__ import annotations
 import json
 from pathlib import Path
 import xml.etree.ElementTree as ET
 SUCCESS_STATUSES = {"ok", "pass", "passed", "success", "compliant"}
 NOT_APPLICABLE_STATUSES = {"not_applicable", "n/a", "na", "none", "skipped"}
 FAILED_STATUSES = {"failed", "fail", "error", "errors", "warn", "warning", "red"}
 CANONICAL_CHECKS = [
    "tests",
    "coverage",
    "loc",
    "docs_naming",
    "gate_glue",
    "sonarqube",
    "supply_chain",
 ]
 def _infer_workspace_coverage_percent(summary: dict, default_xml: str) -> float:
    """Infer workspace line coverage from quality summary coverage XML metadata."""
    results = summary.get("results", []) if isinstance(summary, dict) else []
    coverage_xml = default_xml
    for result in results:
        if not isinstance(result, dict):
            continue
        if str(result.get("name") or "").strip().lower() != "coverage":
            continue
        candidate = str(result.get("coverage_xml") or "").strip()
        if candidate:
            coverage_xml = candidate
            break
    xml_path = Path(coverage_xml)
    if not xml_path.exists():
        return 0.0
    try:
        root = ET.parse(xml_path).getroot()
        line_rate = root.attrib.get("line-rate")
        if line_rate is None:
            return 0.0
        return float(line_rate) * 100.0
    except (ET.ParseError, OSError, ValueError):
        return 0.0
 def _infer_source_lines_over_500(summary: dict) -> int:
    """Infer over-limit source file count from hygiene issue payloads."""
    results = summary.get("results", []) if isinstance(summary, dict) else []
    for result in results:
        if not isinstance(result, dict):
            continue
        if str(result.get("name") or "").strip().lower() not in {"hygiene", "loc", "smell"}:
            continue
        issues = result.get("issues")
        if not isinstance(issues, list):
            continue
        return sum(1 for item in issues if isinstance(item, str) and item.startswith("file exceeds"))
    return 0
 def _normalize_result_status(value: str | None, default: str = "failed") -> str:
    """Map arbitrary check status text into canonical check result buckets."""
    if not value:
        return default
    normalized = value.strip().lower()
    if normalized in SUCCESS_STATUSES:
        return "ok"
    if normalized in NOT_APPLICABLE_STATUSES:
        return "not_applicable"
    if normalized in FAILED_STATUSES:
        return "failed"
    return default
 def _load_optional_json(path: str | None) -> dict:
    """Load an optional JSON report file, returning an empty object when absent."""
    if not path:
        return {}
    candidate = Path(path)
    if not candidate.exists():
        return {}
    try:
        return json.loads(candidate.read_text(encoding="utf-8"))
    except json.JSONDecodeError:
        return {}
 def _combine_statuses(statuses: list[str]) -> str:
    """Roll up many check statuses into one canonical result."""
    if not statuses:
        return "not_applicable"
    if any(status == "failed" for status in statuses):
        return "failed"
    if all(status == "not_applicable" for status in statuses):
        return "not_applicable"
    if all(status in {"ok", "not_applicable"} for status in statuses):
        return "ok"
    return "failed"
 def _infer_sonarqube_status(report: dict) -> str:
    """Infer canonical SonarQube check status from its JSON report payload."""
    if not report:
        return "not_applicable"
    status = (
        report.get("projectStatus", {}).get("status")
        or report.get("qualityGate", {}).get("status")
        or report.get("status")
    )
    return _normalize_result_status(str(status) if status is not None else None, default="failed")
 def _infer_supply_chain_status(report: dict, required: bool) -> str:
    """Infer canonical supply-chain status from IronBank/artifact report payload."""
    if not report:
        return "failed" if required else "not_applicable"
    compliant = report.get("compliant")
    if isinstance(compliant, bool):
        return "ok" if compliant else "failed"
    status = report.get("status")
    if status is None:
        return "failed" if required else "not_applicable"
    normalized = _normalize_result_status(str(status), default="failed")
    if normalized == "not_applicable" and required:
        return "failed"
    return normalized
 def _build_check_statuses(
    summary: dict | None,
    tests: dict[str, int],
    workspace_line_coverage_percent: float,
    source_lines_over_500: int,
    sonarqube_report: dict,
    supply_chain_report: dict,
    supply_chain_required: bool,
 ) -> dict[str, str]:
    """Generate the canonical quality-check status map for dashboarding."""
    raw_results = summary.get("results", []) if isinstance(summary, dict) else []
    status_by_name: dict[str, str] = {}
    for result in raw_results:
        if not isinstance(result, dict):
            continue
        check_name = str(result.get("name") or "").strip().lower()
        if not check_name:
            continue
        status_by_name[check_name] = _normalize_result_status(result.get("status"), default="failed")
    tests_status = status_by_name.get("tests")
    if not tests_status:
        candidate_keys = ["unit", "integration", "e2e", "pytest", "test", "tests"]
        candidates = [status_by_name[key] for key in candidate_keys if key in status_by_name]
        if candidates:
            tests_status = _combine_statuses(candidates)
        elif tests["tests"] > 0:
            tests_status = "ok" if (tests["failures"] + tests["errors"]) == 0 else "failed"
        else:
            tests_status = "not_applicable"
    coverage_status = status_by_name.get("coverage")
    if not coverage_status:
        if workspace_line_coverage_percent > 0:
            coverage_status = "ok" if workspace_line_coverage_percent >= 95.0 else "failed"
        else:
            coverage_status = "not_applicable"
    loc_status = status_by_name.get("loc")
    if not loc_status:
        loc_status = "ok" if source_lines_over_500 == 0 else "failed"
    docs_naming_status = status_by_name.get("docs_naming")
    if not docs_naming_status:
        candidates = [status_by_name[key] for key in ["docs", "hygiene", "smell", "lint", "naming"] if key in status_by_name]
        docs_naming_status = _combine_statuses(candidates) if candidates else "not_applicable"
    gate_glue_status = status_by_name.get("gate_glue")
    if not gate_glue_status:
        candidates = [status_by_name[key] for key in ["gate_glue", "glue", "gate"] if key in status_by_name]
        gate_glue_status = _combine_statuses(candidates) if candidates else "not_applicable"
    sonarqube_status = status_by_name.get("sonarqube") or _infer_sonarqube_status(sonarqube_report)
    supply_chain_status = status_by_name.get("supply_chain") or _infer_supply_chain_status(
        supply_chain_report,
        required=supply_chain_required,
    )
    return {
        "tests": tests_status,
        "coverage": coverage_status,
        "loc": loc_status,
        "docs_naming": docs_naming_status,
        "gate_glue": gate_glue_status,
        "sonarqube": sonarqube_status,
        "supply_chain": supply_chain_status,
    }
--- a/ci/scripts/supply_chain_report.py
+++ b/ci/scripts/supply_chain_report.py
@ -0,0 +1,173 @@
 """Build a titan-iac supply-chain compliance report from Trivy evidence."""
 from __future__ import annotations
 import argparse
 import datetime as dt
 import json
 from pathlib import Path
 from typing import Any
 FAIL_SEVERITIES = {"HIGH", "CRITICAL"}
 def _read_json(path: Path) -> dict[str, Any]:
    """Read a JSON object from disk for use as pipeline evidence."""
    payload = json.loads(path.read_text(encoding="utf-8"))
    if not isinstance(payload, dict):
        raise ValueError(f"{path} must contain a JSON object")
    return payload
 def _parse_day(raw: str | None) -> dt.date | None:
    """Parse an ISO day while letting optional waiver dates stay optional."""
    if not raw:
        return None
    return dt.date.fromisoformat(raw)
 def _today(override: str | None = None) -> dt.date:
    """Return the policy day so tests can pin expiry behavior."""
    return _parse_day(override) or dt.date.today()
 def _load_waiver_pairs(path: Path | None, policy_day: dt.date) -> tuple[set[tuple[str, str]], int]:
    """Return active ``(misconfiguration id, target)`` waivers and expired count."""
    if path is None or not path.exists():
        return set(), 0
    payload = _read_json(path)
    default_expires_at = payload.get("default_expires_at")
    active: set[tuple[str, str]] = set()
    expired = 0
    for entry in payload.get("misconfigurations", []):
        if not isinstance(entry, dict):
            continue
        misconfiguration_id = str(entry.get("id") or "").strip()
        if not misconfiguration_id:
            continue
        expires_at = _parse_day(str(entry.get("expires_at") or default_expires_at or ""))
        targets = entry.get("targets", [])
        if not isinstance(targets, list):
            continue
        if expires_at and expires_at < policy_day:
            expired += len(targets)
            continue
        # Waivers are target-specific so a new unsafe manifest fails until it is
        # either fixed or deliberately accepted with a fresh expiration.
        for target in targets:
            if isinstance(target, str) and target:
                active.add((misconfiguration_id, target))
    return active, expired
 def _iter_failed_misconfigurations(payload: dict[str, Any]):
    """Yield failed high/critical Trivy misconfiguration records."""
    for result in payload.get("Results", []):
        if not isinstance(result, dict):
            continue
        target = str(result.get("Target") or "")
        for item in result.get("Misconfigurations") or []:
            if not isinstance(item, dict):
                continue
            if item.get("Status") != "FAIL":
                continue
            if str(item.get("Severity") or "").upper() not in FAIL_SEVERITIES:
                continue
            yield target, item
 def _count_vulnerabilities(payload: dict[str, Any], severity: str) -> int:
    """Count Trivy vulnerabilities at a specific severity."""
    count = 0
    for result in payload.get("Results", []):
        if not isinstance(result, dict):
            continue
        for item in result.get("Vulnerabilities") or []:
            if isinstance(item, dict) and str(item.get("Severity") or "").upper() == severity:
                count += 1
    return count
 def _count_secrets(payload: dict[str, Any]) -> int:
    """Count detected secrets in the Trivy filesystem report."""
    count = 0
    for result in payload.get("Results", []):
        if isinstance(result, dict):
            count += len(result.get("Secrets") or [])
    return count
 def build_report(
    trivy_payload: dict[str, Any],
    waiver_path: Path | None = None,
    today_override: str | None = None,
 ) -> dict[str, Any]:
    """Build the compliance summary consumed by the quality gate."""
    policy_day = _today(today_override)
    active_waivers, expired_waivers = _load_waiver_pairs(waiver_path, policy_day)
    open_misconfigs: list[dict[str, str]] = []
    waived_misconfigs = 0
    for target, item in _iter_failed_misconfigurations(trivy_payload):
        misconfiguration_id = str(item.get("ID") or "")
        if (misconfiguration_id, target) in active_waivers:
            waived_misconfigs += 1
            continue
        open_misconfigs.append(
            {
                "id": misconfiguration_id,
                "target": target,
                "severity": str(item.get("Severity") or ""),
                "title": str(item.get("Title") or ""),
            }
        )
    critical = _count_vulnerabilities(trivy_payload, "CRITICAL")
    high = _count_vulnerabilities(trivy_payload, "HIGH")
    secrets = _count_secrets(trivy_payload)
    status = "ok" if critical == 0 and secrets == 0 and not open_misconfigs else "failed"
    return {
        "status": status,
        "compliant": status == "ok",
        "category": "artifact_security",
        "scan_type": "filesystem",
        "scanner": "trivy",
        "critical_vulnerabilities": critical,
        "high_vulnerabilities": high,
        "high_vulnerability_policy": "observe",
        "secrets": secrets,
        "high_or_critical_misconfigurations": len(open_misconfigs),
        "waived_misconfigurations": waived_misconfigs,
        "expired_waivers": expired_waivers,
        "waiver_file": str(waiver_path) if waiver_path else "",
        "open_misconfiguration_examples": open_misconfigs[:20],
    }
 def main(argv: list[str] | None = None) -> int:
    """CLI entrypoint used by Jenkins after the Trivy scan completes."""
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--trivy-json", required=True)
    parser.add_argument("--waivers")
    parser.add_argument("--output", required=True)
    parser.add_argument("--today")
    args = parser.parse_args(argv)
    trivy_payload = _read_json(Path(args.trivy_json))
    waiver_path = Path(args.waivers) if args.waivers else None
    report = build_report(trivy_payload, waiver_path=waiver_path, today_override=args.today)
    output_path = Path(args.output)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    output_path.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8")
    return 0
 if __name__ == "__main__":  # pragma: no cover
    raise SystemExit(main())
--- a/ci/tests/glue/test_ariadne_schedules.py
+++ b/ci/tests/glue/test_ariadne_schedules.py
@ -0,0 +1,108 @@
 """Glue checks for Ariadne schedules exported to VictoriaMetrics."""
 from __future__ import annotations
 import os
 from datetime import datetime, timezone
 from pathlib import Path
 import requests
 import yaml
 CONFIG_PATH = Path(__file__).with_name("config.yaml")
 def _load_config() -> dict:
    with CONFIG_PATH.open("r", encoding="utf-8") as handle:
        return yaml.safe_load(handle) or {}
 def _query(promql: str) -> list[dict]:
    vm_url = os.environ.get("VM_URL", "http://victoria-metrics-single-server:8428").rstrip("/")
    response = requests.get(f"{vm_url}/api/v1/query", params={"query": promql}, timeout=10)
    response.raise_for_status()
    payload = response.json()
    return payload.get("data", {}).get("result", [])
 def _expected_tasks() -> list[dict]:
    cfg = _load_config()
    tasks = [
        _normalize_task(item, cfg)
        for item in cfg.get("ariadne_schedule_tasks", [])
    ]
    assert tasks, "No Ariadne schedule tasks configured"
    return tasks
 def _normalize_task(item: object, cfg: dict) -> dict:
    if isinstance(item, str):
        return {
            "task": item,
            "check_last_success": True,
            "max_success_age_hours": cfg.get("max_success_age_hours", 48),
        }
    if isinstance(item, dict):
        normalized = dict(item)
        normalized.setdefault("check_last_success", True)
        normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
        return normalized
    raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
 def _tracked_tasks(tasks: list[dict]) -> list[dict]:
    tracked = [item for item in tasks if item.get("check_last_success")]
    assert tracked, "No Ariadne schedule tasks are marked for success tracking"
    return tracked
 def _task_regex(tasks: list[dict]) -> str:
    return "|".join(item["task"] for item in tasks)
 def test_ariadne_schedule_series_exist():
    tasks = _expected_tasks()
    selector = _task_regex(tasks)
    series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
    seen = {item.get("metric", {}).get("task") for item in series}
    missing = [item["task"] for item in tasks if item["task"] not in seen]
    assert not missing, f"Missing next-run metrics for: {', '.join(missing)}"
 def test_ariadne_schedule_recent_success():
    tasks = _tracked_tasks(_expected_tasks())
    selector = _task_regex(tasks)
    series = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
    seen = {item.get("metric", {}).get("task") for item in series}
    missing = [item["task"] for item in tasks if item["task"] not in seen]
    assert not missing, f"Missing last-success metrics for: {', '.join(missing)}"
    now = datetime.now(timezone.utc)
    age_by_task = {
        item.get("metric", {}).get("task"): (now - datetime.fromtimestamp(float(item["value"][1]), tz=timezone.utc)).total_seconds() / 3600
        for item in series
    }
    too_old = [
        f"{task} ({age_by_task[task]:.1f}h > {item['max_success_age_hours']}h)"
        for item in tasks
        if (task := item["task"]) in age_by_task and age_by_task[task] > float(item["max_success_age_hours"])
    ]
    assert not too_old, "Ariadne schedules are stale: " + ", ".join(too_old)
 def test_ariadne_schedule_last_status_present_and_boolean():
    tasks = _tracked_tasks(_expected_tasks())
    selector = _task_regex(tasks)
    series = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
    seen = {item.get("metric", {}).get("task") for item in series}
    missing = [item["task"] for item in tasks if item["task"] not in seen]
    assert not missing, f"Missing last-status metrics for: {', '.join(missing)}"
    invalid = []
    for item in series:
        task = item.get("metric", {}).get("task")
        value = float(item["value"][1])
        if value not in (0.0, 1.0):
            invalid.append(f"{task}={value}")
    assert not invalid, f"Unexpected Ariadne last-status values: {', '.join(invalid)}"
--- a/ci/tests/glue/test_glue_metrics.py
+++ b/ci/tests/glue/test_glue_metrics.py
@ -1,3 +1,5 @@
 """Glue checks for the metrics the quality-gate publishes."""
 from __future__ import annotations
 import os
@ -23,26 +25,63 @@ def _query(promql: str) -> list[dict]:
    return payload.get("data", {}).get("result", [])
-def test_glue_metrics_present():
+def _expected_tasks() -> list[dict]:
-    series = _query('kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}')
+    cfg = _load_config()
-    assert series, "No glue cronjob label series found"
+    tasks = [
        _normalize_task(item, cfg)
        for item in cfg.get("ariadne_schedule_tasks", [])
    ]
    assert tasks, "No Ariadne schedule tasks configured"
    return tasks
-def test_glue_metrics_success_join():
+def _normalize_task(item: object, cfg: dict) -> dict:
-    query = (
+    if isinstance(item, str):
-        "kube_cronjob_status_last_successful_time "
+        return {
-        'and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}'
+            "task": item,
-    )
+            "check_last_success": True,
-    series = _query(query)
+            "max_success_age_hours": cfg.get("max_success_age_hours", 48),
-    assert series, "No glue cronjob last success series found"
+        }
    if isinstance(item, dict):
        normalized = dict(item)
        normalized.setdefault("check_last_success", True)
        normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
        return normalized
    raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
 def _tracked_tasks(tasks: list[dict]) -> list[dict]:
    tracked = [item for item in tasks if item.get("check_last_success")]
    assert tracked, "No Ariadne schedule tasks are marked for success tracking"
    return tracked
 def _task_regex(tasks: list[dict]) -> str:
    return "|".join(item["task"] for item in tasks)
 def test_ariadne_schedule_metrics_present():
-    cfg = _load_config()
+    tasks = _expected_tasks()
-    expected = cfg.get("ariadne_schedule_tasks", [])
+    selector = _task_regex(tasks)
-    if not expected:
+    series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
-        return
+    seen = {item.get("metric", {}).get("task") for item in series}
-    series = _query("ariadne_schedule_next_run_timestamp_seconds")
+    missing = [item["task"] for item in tasks if item["task"] not in seen]
    tasks = {item.get("metric", {}).get("task") for item in series}
    missing = [task for task in expected if task not in tasks]
    assert not missing, f"Missing Ariadne schedule metrics for: {', '.join(missing)}"
 def test_ariadne_schedule_success_and_status_metrics_present():
    tasks = _tracked_tasks(_expected_tasks())
    selector = _task_regex(tasks)
    success = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
    status = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
    success_tasks = {item.get("metric", {}).get("task") for item in success}
    status_tasks = {item.get("metric", {}).get("task") for item in status}
    expected = {item["task"] for item in tasks}
    missing_success = sorted(expected - success_tasks)
    missing_status = sorted(expected - status_tasks)
    assert not missing_success, f"Missing Ariadne success metrics for: {', '.join(missing_success)}"
    assert not missing_status, f"Missing Ariadne status metrics for: {', '.join(missing_status)}"
--- a/ci/titan-iac-trivy-waivers.json
+++ b/ci/titan-iac-trivy-waivers.json
@ -0,0 +1,401 @@
 {
  "version": 1,
  "generated_from": "Jenkins titan-iac build 225 Trivy filesystem scan",
  "default_expires_at": "2026-05-22",
  "ticket": "atlas-quality-wave-k8s-hardening",
  "default_reason": "Existing Kubernetes manifest hardening baseline accepted only for the first quality-gate rollout; fix or renew explicitly before expiry.",
  "misconfigurations": [
    {
      "id": "DS-0002",
      "targets": [
        "dockerfiles/Dockerfile.ananke-node-helper"
      ]
    },
    {
      "id": "KSV-0009",
      "targets": [
        "services/mailu/vip-controller.yaml",
        "services/maintenance/k3s-agent-restart-daemonset.yaml"
      ]
    },
    {
      "id": "KSV-0010",
      "targets": [
        "services/maintenance/k3s-agent-restart-daemonset.yaml",
        "services/maintenance/metis-sentinel-amd64-daemonset.yaml",
        "services/maintenance/metis-sentinel-arm64-daemonset.yaml",
        "services/monitoring/jetson-tegrastats-exporter.yaml"
      ]
    },
    {
      "id": "KSV-0014",
      "targets": [
        "infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
        "infrastructure/core/ntp-sync-daemonset.yaml",
        "infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
        "infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
        "infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
        "infrastructure/longhorn/core/vault-sync-deployment.yaml",
        "infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
        "infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
        "infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
        "infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
        "infrastructure/postgres/statefulset.yaml",
        "infrastructure/vault-csi/vault-csi-provider.yaml",
        "services/ai-llm/deployment.yaml",
        "services/bstein-dev-home/backend-deployment.yaml",
        "services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
        "services/bstein-dev-home/frontend-deployment.yaml",
        "services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
        "services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
        "services/bstein-dev-home/vault-sync-deployment.yaml",
        "services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
        "services/comms/atlasbot-deployment.yaml",
        "services/comms/coturn.yaml",
        "services/comms/element-call-deployment.yaml",
        "services/comms/guest-name-job.yaml",
        "services/comms/guest-register-deployment.yaml",
        "services/comms/livekit-token-deployment.yaml",
        "services/comms/livekit.yaml",
        "services/comms/mas-deployment.yaml",
        "services/comms/oneoffs/bstein-force-leave-job.yaml",
        "services/comms/oneoffs/comms-secrets-ensure-job.yaml",
        "services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
        "services/comms/oneoffs/mas-db-ensure-job.yaml",
        "services/comms/oneoffs/mas-local-users-ensure-job.yaml",
        "services/comms/oneoffs/othrys-kick-numeric-job.yaml",
        "services/comms/oneoffs/synapse-admin-ensure-job.yaml",
        "services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
        "services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
        "services/comms/oneoffs/synapse-user-seed-job.yaml",
        "services/comms/pin-othrys-job.yaml",
        "services/comms/reset-othrys-room-job.yaml",
        "services/comms/seed-othrys-room.yaml",
        "services/comms/vault-sync-deployment.yaml",
        "services/comms/wellknown.yaml",
        "services/crypto/monerod/deployment.yaml",
        "services/crypto/wallet-monero-temp/deployment.yaml",
        "services/crypto/xmr-miner/deployment.yaml",
        "services/crypto/xmr-miner/vault-sync-deployment.yaml",
        "services/crypto/xmr-miner/xmrig-daemonset.yaml",
        "services/finance/actual-budget-deployment.yaml",
        "services/finance/firefly-cronjob.yaml",
        "services/finance/firefly-deployment.yaml",
        "services/finance/firefly-user-sync-cronjob.yaml",
        "services/finance/oneoffs/finance-secrets-ensure-job.yaml",
        "services/gitea/deployment.yaml",
        "services/harbor/vault-sync-deployment.yaml",
        "services/health/wger-admin-ensure-cronjob.yaml",
        "services/health/wger-deployment.yaml",
        "services/health/wger-user-sync-cronjob.yaml",
        "services/jellyfin/deployment.yaml",
        "services/jellyfin/loader.yaml",
        "services/jenkins/deployment.yaml",
        "services/jenkins/vault-sync-deployment.yaml",
        "services/keycloak/deployment.yaml",
        "services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/ldap-federation-job.yaml",
        "services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
        "services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/portal-e2e-client-job.yaml",
        "services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
        "services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
        "services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
        "services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
        "services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/realm-settings-job.yaml",
        "services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/user-overrides-job.yaml",
        "services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
        "services/keycloak/vault-sync-deployment.yaml",
        "services/logging/node-image-gc-rpi4-daemonset.yaml",
        "services/logging/node-image-prune-rpi5-daemonset.yaml",
        "services/logging/node-log-rotation-daemonset.yaml",
        "services/logging/oauth2-proxy.yaml",
        "services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
        "services/logging/oneoffs/opensearch-ism-job.yaml",
        "services/logging/oneoffs/opensearch-observability-setup-job.yaml",
        "services/logging/opensearch-prune-cronjob.yaml",
        "services/logging/vault-sync-deployment.yaml",
        "services/mailu/mailu-sync-cronjob.yaml",
        "services/mailu/mailu-sync-listener.yaml",
        "services/mailu/oneoffs/mailu-sync-job.yaml",
        "services/mailu/vault-sync-deployment.yaml",
        "services/mailu/vip-controller.yaml",
        "services/maintenance/ariadne-deployment.yaml",
        "services/maintenance/disable-k3s-traefik-daemonset.yaml",
        "services/maintenance/image-sweeper-cronjob.yaml",
        "services/maintenance/k3s-agent-restart-daemonset.yaml",
        "services/maintenance/metis-deployment.yaml",
        "services/maintenance/metis-k3s-token-sync-cronjob.yaml",
        "services/maintenance/metis-sentinel-amd64-daemonset.yaml",
        "services/maintenance/metis-sentinel-arm64-daemonset.yaml",
        "services/maintenance/node-image-sweeper-daemonset.yaml",
        "services/maintenance/node-nofile-daemonset.yaml",
        "services/maintenance/oauth2-proxy-metis.yaml",
        "services/maintenance/oauth2-proxy-soteria.yaml",
        "services/maintenance/oneoffs/ariadne-migrate-job.yaml",
        "services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
        "services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
        "services/maintenance/pod-cleaner-cronjob.yaml",
        "services/maintenance/soteria-deployment.yaml",
        "services/maintenance/vault-sync-deployment.yaml",
        "services/monitoring/dcgm-exporter.yaml",
        "services/monitoring/jetson-tegrastats-exporter.yaml",
        "services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
        "services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
        "services/monitoring/platform-quality-gateway-deployment.yaml",
        "services/monitoring/platform-quality-suite-probe-cronjob.yaml",
        "services/monitoring/postmark-exporter-deployment.yaml",
        "services/monitoring/vault-sync-deployment.yaml",
        "services/nextcloud-mail-sync/cronjob.yaml",
        "services/nextcloud/collabora.yaml",
        "services/nextcloud/cronjob.yaml",
        "services/nextcloud/deployment.yaml",
        "services/nextcloud/maintenance-cronjob.yaml",
        "services/oauth2-proxy/deployment.yaml",
        "services/openldap/statefulset.yaml",
        "services/outline/deployment.yaml",
        "services/outline/redis-deployment.yaml",
        "services/pegasus/deployment.yaml",
        "services/pegasus/vault-sync-deployment.yaml",
        "services/planka/deployment.yaml",
        "services/quality/oauth2-proxy-sonarqube.yaml",
        "services/quality/sonarqube-deployment.yaml",
        "services/quality/sonarqube-exporter-deployment.yaml",
        "services/sui-metrics/base/deployment.yaml",
        "services/typhon/vault-sync-deployment.yaml",
        "services/vault/k8s-auth-config-cronjob.yaml",
        "services/vault/oidc-config-cronjob.yaml",
        "services/vault/statefulset.yaml",
        "services/vaultwarden/deployment.yaml"
      ]
    },
    {
      "id": "KSV-0017",
      "targets": [
        "infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
        "infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
        "infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
        "services/logging/node-image-gc-rpi4-daemonset.yaml",
        "services/logging/node-image-prune-rpi5-daemonset.yaml",
        "services/logging/node-log-rotation-daemonset.yaml",
        "services/maintenance/disable-k3s-traefik-daemonset.yaml",
        "services/maintenance/image-sweeper-cronjob.yaml",
        "services/maintenance/k3s-agent-restart-daemonset.yaml",
        "services/maintenance/metis-deployment.yaml",
        "services/maintenance/metis-sentinel-amd64-daemonset.yaml",
        "services/maintenance/metis-sentinel-arm64-daemonset.yaml",
        "services/maintenance/node-image-sweeper-daemonset.yaml",
        "services/maintenance/node-nofile-daemonset.yaml",
        "services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
        "services/monitoring/dcgm-exporter.yaml",
        "services/monitoring/jetson-tegrastats-exporter.yaml"
      ]
    },
    {
      "id": "KSV-0041",
      "targets": [
        "infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
        "infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
        "infrastructure/traefik/clusterrole.yaml",
        "services/bstein-dev-home/rbac.yaml",
        "services/comms/comms-secrets-ensure-rbac.yaml",
        "services/comms/mas-db-ensure-rbac.yaml",
        "services/comms/mas-secrets-ensure-rbac.yaml",
        "services/maintenance/soteria-rbac.yaml"
      ]
    },
    {
      "id": "KSV-0047",
      "targets": [
        "services/monitoring/rbac.yaml"
      ]
    },
    {
      "id": "KSV-0053",
      "targets": [
        "services/comms/comms-secrets-ensure-rbac.yaml",
        "services/comms/mas-db-ensure-rbac.yaml",
        "services/jenkins/serviceaccount.yaml",
        "services/maintenance/ariadne-rbac.yaml"
      ]
    },
    {
      "id": "KSV-0056",
      "targets": [
        "infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
        "infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
        "services/jenkins/serviceaccount.yaml",
        "services/maintenance/disable-k3s-traefik-rbac.yaml",
        "services/maintenance/k3s-traefik-cleanup-rbac.yaml"
      ]
    },
    {
      "id": "KSV-0114",
      "targets": [
        "infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml"
      ]
    },
    {
      "id": "KSV-0118",
      "targets": [
        "infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
        "infrastructure/core/coredns-deployment.yaml",
        "infrastructure/core/ntp-sync-daemonset.yaml",
        "infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
        "infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
        "infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
        "infrastructure/longhorn/core/vault-sync-deployment.yaml",
        "infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
        "infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
        "infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
        "infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
        "infrastructure/postgres/statefulset.yaml",
        "infrastructure/vault-csi/vault-csi-provider.yaml",
        "services/ai-llm/deployment.yaml",
        "services/bstein-dev-home/backend-deployment.yaml",
        "services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
        "services/bstein-dev-home/frontend-deployment.yaml",
        "services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
        "services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
        "services/bstein-dev-home/vault-sync-deployment.yaml",
        "services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
        "services/comms/atlasbot-deployment.yaml",
        "services/comms/coturn.yaml",
        "services/comms/element-call-deployment.yaml",
        "services/comms/guest-name-job.yaml",
        "services/comms/livekit-token-deployment.yaml",
        "services/comms/livekit.yaml",
        "services/comms/mas-deployment.yaml",
        "services/comms/oneoffs/bstein-force-leave-job.yaml",
        "services/comms/oneoffs/comms-secrets-ensure-job.yaml",
        "services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
        "services/comms/oneoffs/mas-db-ensure-job.yaml",
        "services/comms/oneoffs/mas-local-users-ensure-job.yaml",
        "services/comms/oneoffs/othrys-kick-numeric-job.yaml",
        "services/comms/oneoffs/synapse-admin-ensure-job.yaml",
        "services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
        "services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
        "services/comms/oneoffs/synapse-user-seed-job.yaml",
        "services/comms/pin-othrys-job.yaml",
        "services/comms/reset-othrys-room-job.yaml",
        "services/comms/seed-othrys-room.yaml",
        "services/comms/vault-sync-deployment.yaml",
        "services/comms/wellknown.yaml",
        "services/crypto/monerod/deployment.yaml",
        "services/crypto/wallet-monero-temp/deployment.yaml",
        "services/crypto/xmr-miner/deployment.yaml",
        "services/crypto/xmr-miner/vault-sync-deployment.yaml",
        "services/crypto/xmr-miner/xmrig-daemonset.yaml",
        "services/finance/firefly-cronjob.yaml",
        "services/finance/firefly-deployment.yaml",
        "services/finance/firefly-user-sync-cronjob.yaml",
        "services/finance/oneoffs/finance-secrets-ensure-job.yaml",
        "services/gitea/deployment.yaml",
        "services/harbor/vault-sync-deployment.yaml",
        "services/health/wger-admin-ensure-cronjob.yaml",
        "services/health/wger-deployment.yaml",
        "services/health/wger-user-sync-cronjob.yaml",
        "services/jellyfin/loader.yaml",
        "services/jenkins/deployment.yaml",
        "services/jenkins/vault-sync-deployment.yaml",
        "services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/ldap-federation-job.yaml",
        "services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
        "services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/portal-e2e-client-job.yaml",
        "services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
        "services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
        "services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
        "services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
        "services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/realm-settings-job.yaml",
        "services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
        "services/keycloak/oneoffs/user-overrides-job.yaml",
        "services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
        "services/keycloak/vault-sync-deployment.yaml",
        "services/logging/node-image-gc-rpi4-daemonset.yaml",
        "services/logging/node-image-prune-rpi5-daemonset.yaml",
        "services/logging/node-log-rotation-daemonset.yaml",
        "services/logging/oauth2-proxy.yaml",
        "services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
        "services/logging/oneoffs/opensearch-ism-job.yaml",
        "services/logging/oneoffs/opensearch-observability-setup-job.yaml",
        "services/logging/opensearch-prune-cronjob.yaml",
        "services/logging/vault-sync-deployment.yaml",
        "services/mailu/mailu-sync-cronjob.yaml",
        "services/mailu/mailu-sync-listener.yaml",
        "services/mailu/oneoffs/mailu-sync-job.yaml",
        "services/mailu/vault-sync-deployment.yaml",
        "services/mailu/vip-controller.yaml",
        "services/maintenance/ariadne-deployment.yaml",
        "services/maintenance/disable-k3s-traefik-daemonset.yaml",
        "services/maintenance/image-sweeper-cronjob.yaml",
        "services/maintenance/k3s-agent-restart-daemonset.yaml",
        "services/maintenance/metis-deployment.yaml",
        "services/maintenance/metis-k3s-token-sync-cronjob.yaml",
        "services/maintenance/metis-sentinel-amd64-daemonset.yaml",
        "services/maintenance/metis-sentinel-arm64-daemonset.yaml",
        "services/maintenance/node-image-sweeper-daemonset.yaml",
        "services/maintenance/node-nofile-daemonset.yaml",
        "services/maintenance/oauth2-proxy-metis.yaml",
        "services/maintenance/oauth2-proxy-soteria.yaml",
        "services/maintenance/oneoffs/ariadne-migrate-job.yaml",
        "services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
        "services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
        "services/maintenance/pod-cleaner-cronjob.yaml",
        "services/maintenance/soteria-deployment.yaml",
        "services/maintenance/vault-sync-deployment.yaml",
        "services/monitoring/dcgm-exporter.yaml",
        "services/monitoring/jetson-tegrastats-exporter.yaml",
        "services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
        "services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
        "services/monitoring/platform-quality-gateway-deployment.yaml",
        "services/monitoring/platform-quality-suite-probe-cronjob.yaml",
        "services/monitoring/postmark-exporter-deployment.yaml",
        "services/monitoring/vault-sync-deployment.yaml",
        "services/nextcloud/collabora.yaml",
        "services/oauth2-proxy/deployment.yaml",
        "services/openldap/statefulset.yaml",
        "services/outline/deployment.yaml",
        "services/outline/redis-deployment.yaml",
        "services/pegasus/vault-sync-deployment.yaml",
        "services/quality/oauth2-proxy-sonarqube.yaml",
        "services/quality/sonarqube-deployment.yaml",
        "services/quality/sonarqube-exporter-deployment.yaml",
        "services/sui-metrics/base/deployment.yaml",
        "services/sui-metrics/overlays/atlas/patch-node-selector.yaml",
        "services/typhon/deployment.yaml",
        "services/typhon/vault-sync-deployment.yaml",
        "services/vault/k8s-auth-config-cronjob.yaml",
        "services/vault/oidc-config-cronjob.yaml",
        "services/vaultwarden/deployment.yaml"
      ]
    },
    {
      "id": "KSV-0121",
      "targets": [
        "services/logging/node-image-gc-rpi4-daemonset.yaml",
        "services/logging/node-image-prune-rpi5-daemonset.yaml",
        "services/logging/node-log-rotation-daemonset.yaml",
        "services/maintenance/disable-k3s-traefik-daemonset.yaml",
        "services/maintenance/image-sweeper-cronjob.yaml",
        "services/maintenance/metis-deployment.yaml",
        "services/maintenance/node-image-sweeper-daemonset.yaml",
        "services/maintenance/node-nofile-daemonset.yaml",
        "services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml"
      ]
    }
  ]
 }
--- a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml
+++ b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml
@ -13,14 +13,14 @@ spec:
  git:
    checkout:
      ref:
-        branch: feature/ariadne
+        branch: main
    commit:
      author:
        email: ops@bstein.dev
        name: flux-bot
      messageTemplate: "chore(bstein-dev-home): automated image update"
    push:
-      branch: feature/ariadne
+      branch: main
  update:
    strategy: Setters
    path: services/bstein-dev-home
--- a/clusters/atlas/flux-system/applications/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/kustomization.yaml
@ -21,6 +21,7 @@ resources:
  - sui-metrics/kustomization.yaml
  - openldap/kustomization.yaml
  - keycloak/kustomization.yaml
  - quality/kustomization.yaml
  - oauth2-proxy/kustomization.yaml
  - mailu/kustomization.yaml
  - jenkins/kustomization.yaml
--- a/clusters/atlas/flux-system/applications/quality/kustomization.yaml
+++ b/clusters/atlas/flux-system/applications/quality/kustomization.yaml
@ -0,0 +1,35 @@
 # clusters/atlas/flux-system/applications/quality/kustomization.yaml
 apiVersion: kustomize.toolkit.fluxcd.io/v1
 kind: Kustomization
 metadata:
  name: quality
  namespace: flux-system
 spec:
  interval: 10m
  path: ./services/quality
  prune: true
  sourceRef:
    kind: GitRepository
    name: flux-system
  targetNamespace: quality
  dependsOn:
    - name: traefik
    - name: cert-manager
    - name: keycloak
    - name: vault
    - name: postgres
  healthChecks:
    - apiVersion: apps/v1
      kind: Deployment
      name: sonarqube
      namespace: quality
    - apiVersion: apps/v1
      kind: Deployment
      name: sonarqube-exporter
      namespace: quality
    - apiVersion: apps/v1
      kind: Deployment
      name: oauth2-proxy-sonarqube
      namespace: quality
  wait: false
  timeout: 20m
--- a/dockerfiles/Dockerfile.comms-guest-tools
+++ b/dockerfiles/Dockerfile.comms-guest-tools
@ -2,4 +2,8 @@ FROM python:3.11-slim
 ENV PIP_DISABLE_PIP_VERSION_CHECK=1
-RUN pip install --no-cache-dir requests psycopg2-binary
+RUN pip install --no-cache-dir requests psycopg2-binary \
    && groupadd --system guest-tools \
    && useradd --system --uid 65532 --gid guest-tools --home-dir /nonexistent --shell /usr/sbin/nologin guest-tools
 USER guest-tools
--- a/dockerfiles/Dockerfile.data-prepper
+++ b/dockerfiles/Dockerfile.data-prepper
@ -1,16 +1,8 @@
-FROM --platform=$BUILDPLATFORM opensearchproject/data-prepper:2.8.0 AS source
+# Use the mirrored Harbor artifact so CI does not depend on Docker Hub egress.
-
+FROM registry.bstein.dev/streaming/data-prepper@sha256:32ac6ad42e0f12da08bebee307e290b17d127b30def9b06eeaffbcbbc5033e83
 FROM --platform=$TARGETPLATFORM eclipse-temurin:17-jre
 ENV DATA_PREPPER_PATH=/usr/share/data-prepper
 RUN useradd -u 10001 -M -U -d / -s /usr/sbin/nologin data_prepper \
  && mkdir -p /var/log/data-prepper
 COPY --from=source /usr/share/data-prepper /usr/share/data-prepper
 RUN chown -R 10001:10001 /usr/share/data-prepper /var/log/data-prepper
 USER 10001
 WORKDIR /usr/share/data-prepper
 CMD ["bin/data-prepper"]
--- a/dockerfiles/Dockerfile.livekit-token-vault
+++ b/dockerfiles/Dockerfile.livekit-token-vault
@ -1,10 +1,13 @@
 FROM ghcr.io/element-hq/lk-jwt-service:0.3.0 AS base
 FROM alpine:3.20
-RUN apk add --no-cache ca-certificates
+RUN apk add --no-cache ca-certificates \
    && addgroup -S livekit-token \
    && adduser -S -D -H -u 65532 -G livekit-token livekit-token
 COPY --from=base /lk-jwt-service /lk-jwt-service
 COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
 RUN chmod 0755 /entrypoint.sh
 USER livekit-token
 ENTRYPOINT ["/entrypoint.sh"]
 CMD ["/lk-jwt-service"]
--- a/dockerfiles/Dockerfile.monero-p2pool
+++ b/dockerfiles/Dockerfile.monero-p2pool
@ -29,10 +29,12 @@ FROM ${DEBIAN_IMAGE}
 RUN set -eux; \
    apt-get update; \
    apt-get install -y --no-install-recommends ca-certificates; \
-    update-ca-certificates; rm -rf /var/lib/apt/lists/*
+    update-ca-certificates; rm -rf /var/lib/apt/lists/*; \
    groupadd --system p2pool; \
    useradd --system --uid 65532 --gid p2pool --home-dir /nonexistent --shell /usr/sbin/nologin p2pool
 COPY --from=fetch /out/p2pool /usr/local/bin/p2pool
 RUN /usr/local/bin/p2pool --version || true
 EXPOSE 3333
 USER p2pool
 ENTRYPOINT ["/usr/local/bin/p2pool"]
--- a/dockerfiles/Dockerfile.monero-wallet-rpc
+++ b/dockerfiles/Dockerfile.monero-wallet-rpc
@ -26,9 +26,12 @@ RUN set -eux; \
    curl -fsSL "$URL" -o /opt/monero/monero.tar.bz2; \
    tar -xjf /opt/monero/monero.tar.bz2 -C /opt/monero --strip-components=1; \
    install -m 0755 /opt/monero/monero-wallet-rpc /usr/local/bin/monero-wallet-rpc; \
-    rm -f /opt/monero/monero.tar.bz2
+    rm -f /opt/monero/monero.tar.bz2; \
    groupadd --system monero; \
    useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero
 ENV PATH="/usr/local/bin:/usr/bin:/bin"
 RUN /usr/local/bin/monero-wallet-rpc --version || true
 EXPOSE 18083
 USER monero
--- a/dockerfiles/Dockerfile.monerod
+++ b/dockerfiles/Dockerfile.monerod
@ -23,10 +23,14 @@ RUN set -eux; \
    mkdir -p /opt/monero; \
    tar -xjf /tmp/monero.tar.bz2 -C /opt/monero --strip-components=1; \
    rm -f /tmp/monero.tar.bz2; \
    groupadd --system monero; \
    useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero; \
    mkdir -p /data; \
    chown monero:monero /data; \
    chmod 0770 /data
 ENV LD_LIBRARY_PATH=/opt/monero:/opt/monero/lib \
    PATH="/opt/monero:${PATH}"
 USER monero
 CMD ["/opt/monero/monerod", "--version"]
--- a/dockerfiles/Dockerfile.oauth2-proxy-vault
+++ b/dockerfiles/Dockerfile.oauth2-proxy-vault
@ -1,10 +1,13 @@
 FROM quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 AS base
 FROM alpine:3.20
-RUN apk add --no-cache ca-certificates
+RUN apk add --no-cache ca-certificates \
    && addgroup -S oauth2-proxy \
    && adduser -S -D -H -u 65532 -G oauth2-proxy oauth2-proxy
 COPY --from=base /bin/oauth2-proxy /bin/oauth2-proxy
 COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
 RUN chmod 0755 /entrypoint.sh
 USER oauth2-proxy
 ENTRYPOINT ["/entrypoint.sh"]
 CMD ["/bin/oauth2-proxy"]
--- a/dockerfiles/Dockerfile.pegasus-vault
+++ b/dockerfiles/Dockerfile.pegasus-vault
@ -1,10 +1,13 @@
 FROM registry.bstein.dev/streaming/pegasus:1.2.32 AS base
 FROM alpine:3.20
-RUN apk add --no-cache ca-certificates
+RUN apk add --no-cache ca-certificates \
    && addgroup -S pegasus \
    && adduser -S -D -H -u 65532 -G pegasus pegasus
 COPY --from=base /pegasus /pegasus
 COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
 RUN chmod 0755 /entrypoint.sh
 USER pegasus
 ENTRYPOINT ["/entrypoint.sh"]
 CMD ["/pegasus"]
--- a/dockerfiles/Dockerfile.quality-tools
+++ b/dockerfiles/Dockerfile.quality-tools
@ -0,0 +1,48 @@
 # dockerfiles/Dockerfile.quality-tools
 FROM debian:bookworm-slim
 ARG SONAR_SCANNER_VERSION=8.0.1.6346
 ARG TRIVY_VERSION=0.70.0
 ENV TRIVY_CACHE_DIR=/opt/trivy-cache
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 RUN apt-get update \
  && apt-get install -y --no-install-recommends \
    bash \
    ca-certificates \
    curl \
    git \
    jq \
    unzip \
  && rm -rf /var/lib/apt/lists/* \
  && groupadd --system quality-tools \
  && useradd --system --uid 65532 --gid quality-tools --home-dir /nonexistent --shell /usr/sbin/nologin quality-tools
 RUN set -eux; \
  scanner_zip="sonar-scanner-cli-${SONAR_SCANNER_VERSION}-linux-aarch64.zip"; \
  base_url="https://binaries.sonarsource.com/Distribution/sonar-scanner-cli"; \
  curl -fsSL "${base_url}/${scanner_zip}" -o "/tmp/${scanner_zip}"; \
  curl -fsSL "${base_url}/${scanner_zip}.sha256" -o "/tmp/${scanner_zip}.sha256"; \
  printf '%s  %s\n' "$(cat "/tmp/${scanner_zip}.sha256")" "/tmp/${scanner_zip}" | sha256sum -c -; \
  unzip -q "/tmp/${scanner_zip}" -d /opt; \
  ln -s "/opt/sonar-scanner-${SONAR_SCANNER_VERSION}-linux-aarch64/bin/sonar-scanner" /usr/local/bin/sonar-scanner; \
  rm -f "/tmp/${scanner_zip}" "/tmp/${scanner_zip}.sha256"
 RUN set -eux; \
  trivy_tgz="trivy_${TRIVY_VERSION}_Linux-ARM64.tar.gz"; \
  curl -fsSL "https://github.com/aquasecurity/trivy/releases/download/v${TRIVY_VERSION}/${trivy_tgz}" -o "/tmp/${trivy_tgz}"; \
  tar -C /usr/local/bin -xzf "/tmp/${trivy_tgz}" trivy; \
  rm -f "/tmp/${trivy_tgz}"; \
  trivy --version; \
  sonar-scanner -v
 RUN set -eux; \
  mkdir -p "${TRIVY_CACHE_DIR}"; \
  trivy image --download-db-only --cache-dir "${TRIVY_CACHE_DIR}"; \
  chmod -R a+rX "${TRIVY_CACHE_DIR}"; \
  mkdir -p /workspace; \
  chown quality-tools:quality-tools /workspace
 WORKDIR /workspace
 USER quality-tools
--- a/infrastructure/cert-manager/helmrelease.yaml
+++ b/infrastructure/cert-manager/helmrelease.yaml
@ -33,6 +33,36 @@ spec:
      node-role.kubernetes.io/worker: "true"
    affinity:
      nodeAffinity:
        preferredDuringSchedulingIgnoredDuringExecution:
          - weight: 100
            preference:
              matchExpressions:
                - key: atlas.bstein.dev/spillover
                  operator: DoesNotExist
          - weight: 95
            preference:
              matchExpressions:
                - key: kubernetes.io/hostname
                  operator: NotIn
                  values:
                    - titan-13
                    - titan-15
                    - titan-17
                    - titan-19
          - weight: 90
            preference:
              matchExpressions:
                - key: hardware
                  operator: In
                  values:
                    - rpi5
          - weight: 50
            preference:
              matchExpressions:
                - key: hardware
                  operator: In
                  values:
                    - rpi4
        requiredDuringSchedulingIgnoredDuringExecution:
          nodeSelectorTerms:
            - matchExpressions:
@ -46,6 +76,36 @@ spec:
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: atlas.bstein.dev/spillover
                    operator: DoesNotExist
            - weight: 95
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: NotIn
                    values:
                      - titan-13
                      - titan-15
                      - titan-17
                      - titan-19
            - weight: 90
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values:
                      - rpi5
            - weight: 50
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values:
                      - rpi4
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
@ -59,6 +119,36 @@ spec:
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: atlas.bstein.dev/spillover
                    operator: DoesNotExist
            - weight: 95
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: NotIn
                    values:
                      - titan-13
                      - titan-15
                      - titan-17
                      - titan-19
            - weight: 90
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values:
                      - rpi5
            - weight: 50
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values:
                      - rpi4
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
--- a/infrastructure/longhorn/core/helmrelease.yaml
+++ b/infrastructure/longhorn/core/helmrelease.yaml
@ -26,6 +26,9 @@ spec:
    cleanupOnFail: true
    timeout: 15m
  values:
    global:
      nodeSelector:
        longhorn-host: "true"
    service:
      ui:
        type: NodePort
@ -78,3 +81,12 @@ spec:
          tag: v2.16.0
    defaultSettings:
      systemManagedPodsImagePullPolicy: Always
    longhornManager:
      nodeSelector:
        longhorn-host: "true"
    longhornDriver:
      nodeSelector:
        longhorn-host: "true"
    longhornUI:
      nodeSelector:
        longhorn-host: "true"
--- a/infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml
+++ b/infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml
@ -2,10 +2,11 @@
 apiVersion: batch/v1
 kind: Job
 metadata:
-  name: longhorn-settings-ensure-4
+  name: longhorn-settings-ensure-7
  namespace: longhorn-system
 spec:
  backoffLimit: 0
  activeDeadlineSeconds: 240
  ttlSecondsAfterFinished: 3600
  template:
    spec:
--- a/infrastructure/longhorn/core/scripts/longhorn_settings_ensure.sh
+++ b/infrastructure/longhorn/core/scripts/longhorn_settings_ensure.sh
@ -4,11 +4,12 @@ set -eu
 # Longhorn blocks direct CR patches for some settings; use the internal API instead.
 api_base="http://longhorn-backend.longhorn-system.svc:9500/v1/settings"
 curl_opts="-fsS --connect-timeout 3 --max-time 15"
 wait_for_api() {
  attempts=30
  while [ "${attempts}" -gt 0 ]; do
-    if curl -fsS "${api_base}" >/dev/null 2>&1; then
+    if curl ${curl_opts} "${api_base}" >/dev/null 2>&1; then
      return 0
    fi
    attempts=$((attempts - 1))
@ -22,14 +23,14 @@ update_setting() {
  name="$1"
  value="$2"
-  current="$(curl -fsS "${api_base}/${name}" || true)"
+  current="$(curl ${curl_opts} "${api_base}/${name}" || true)"
  if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then
    echo "Setting ${name} already set."
    return 0
  fi
  echo "Setting ${name} -> ${value}"
-  curl -fsS -X PUT \
+  curl ${curl_opts} -X PUT \
    -H "Content-Type: application/json" \
    -d "{\"value\":\"${value}\"}" \
    "${api_base}/${name}" >/dev/null
@ -40,3 +41,7 @@ update_setting default-engine-image "registry.bstein.dev/infra/longhorn-engine:v
 update_setting default-instance-manager-image "registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2"
 update_setting default-backing-image-manager-image "registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2"
 update_setting support-bundle-manager-image "registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56"
 # Keep storage-heavy nodes from getting hammered by rebuild storms and skew.
 update_setting replica-auto-balance "best-effort"
 update_setting concurrent-replica-rebuild-per-node-limit "2"
 update_setting node-down-pod-deletion-policy "delete-both-statefulset-and-deployment-pod"
--- a/infrastructure/longhorn/core/vault-sync-deployment.yaml
+++ b/infrastructure/longhorn/core/vault-sync-deployment.yaml
@ -26,6 +26,16 @@ spec:
                  - key: hardware
                    operator: In
                    values: ["rpi5", "rpi4"]
            - weight: 90
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: NotIn
                    values:
                      - titan-13
                      - titan-15
                      - titan-17
                      - titan-19
      containers:
        - name: sync
          image: alpine:3.20
--- a/infrastructure/traefik/deployment.yaml
+++ b/infrastructure/traefik/deployment.yaml
@ -70,6 +70,38 @@ items:
        dnsPolicy: ClusterFirst
        nodeSelector:
          node-role.kubernetes.io/worker: "true"
        affinity:
          nodeAffinity:
            preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                - key: atlas.bstein.dev/spillover
                  operator: DoesNotExist
            - weight: 95
              preference:
                matchExpressions:
                - key: kubernetes.io/hostname
                  operator: NotIn
                  values:
                  - titan-13
                  - titan-15
                  - titan-17
                  - titan-19
            - weight: 90
              preference:
                matchExpressions:
                - key: hardware
                  operator: In
                  values:
                  - rpi5
            - weight: 50
              preference:
                matchExpressions:
                - key: hardware
                  operator: In
                  values:
                  - rpi4
        restartPolicy: Always
        schedulerName: default-scheduler
        serviceAccount: atlas-traefik-ingress-controller
--- a/infrastructure/vault-injector/helmrelease.yaml
+++ b/infrastructure/vault-injector/helmrelease.yaml
@ -41,3 +41,12 @@ spec:
        failurePolicy: Ignore
      nodeSelector:
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: NotIn
                    values: ["titan-13", "titan-15", "titan-17", "titan-19"]
--- a/scripts/dashboards_render_atlas.py
+++ b/scripts/dashboards_render_atlas.py
--- a/scripts/tests/test_dashboards_render_atlas.py
+++ b/scripts/tests/test_dashboards_render_atlas.py
@ -4,13 +4,21 @@ import pathlib
 def load_module():
    path = pathlib.Path(__file__).resolve().parents[1] / "dashboards_render_atlas.py"
-    spec = importlib.util.spec_from_file_location("dashboards_render_atlas", path)
+    spec = importlib.util.spec_from_file_location("scripts.dashboards_render_atlas", path)
    module = importlib.util.module_from_spec(spec)
    assert spec.loader is not None
    spec.loader.exec_module(module)
    return module
 def flatten_panels(panels):
    flat = []
    for panel in panels:
        flat.append(panel)
        flat.extend(panel.get("panels", []))
    return flat
 def test_table_panel_options_and_filterable():
    mod = load_module()
    panel = mod.table_panel(
@ -56,3 +64,71 @@ def test_render_configmap_writes(tmp_path):
    content = (tmp_path / "cm.yaml").read_text()
    assert "kind: ConfigMap" in content
    assert f"{uid}.json" in content
 def test_testing_suite_variable_uses_canonical_values_only():
    mod = load_module()
    variable = mod.testing_suite_variable()
    canonical_matcher = "|".join(mod.PLATFORM_TEST_SUITE_NAMES)
    legacy_names = {"bstein-home", "data-prepper", "titan-iac", "pegasus-health"}
    assert variable["allValue"] == canonical_matcher
    assert not any(alias in variable["query"] for alias in legacy_names)
    assert not any(alias in variable["allValue"] for alias in legacy_names)
    assert [option["value"] for option in variable["options"]] == mod.PLATFORM_TEST_SUITE_NAMES
 def test_jobs_dashboard_separates_current_gate_health_from_reliability():
    mod = load_module()
    dashboard = mod.build_jobs_dashboard()
    panels_by_title = {panel["title"]: panel for panel in flatten_panels(dashboard["panels"])}
    assert "Current Gate Health by Suite" in panels_by_title
    assert "Run Reliability by Suite (24h)" in panels_by_title
    assert "Run Reliability History by Suite" in panels_by_title
    assert "Failures by Suite (24h)" not in panels_by_title
    assert "Success Rate by Suite (24h)" not in panels_by_title
    current_gate_expr = panels_by_title["Current Gate Health by Suite"]["targets"][0]["expr"]
    assert 'check)' in current_gate_expr
    assert 'result=~"ok|passed|success|not_applicable|skipped|na|n/a"' in current_gate_expr
    reliability_panel = panels_by_title["Run Reliability by Suite (24h)"]
    reliability_expr = reliability_panel["targets"][0]["expr"]
    assert "platform_quality_gate_runs_total" in reliability_expr
    assert "> 0" in reliability_expr
    assert "- 1" in reliability_expr
    assert reliability_panel["fieldConfig"]["defaults"]["mappings"] == [
        {"type": "value", "options": {"-1": {"text": "no runs"}}}
    ]
 def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint():
    mod = load_module()
    dashboard = mod.build_jobs_dashboard()
    panels = dashboard["panels"]
    rows = [panel for panel in panels if panel["type"] == "row"]
    visible_query_panels = [panel for panel in panels if panel["type"] != "row"]
    nested_panels_by_title = {
        child["title"]: child
        for row in rows
        for child in row.get("panels", [])
    }
    assert len(panels) == 16
    assert len(visible_query_panels) == 11
    assert sum(len(panel.get("targets", [])) for panel in visible_query_panels) == 11
    assert [row["title"] for row in rows] == [
        "Reliability And Run History",
        "Failure Trends By Check",
        "Success Trends By Check",
        "Test Drilldowns And Problem Tests",
        "Telemetry Completeness, SonarQube, And Branches",
    ]
    assert all(row["collapsed"] for row in rows)
    assert "Failure Trend: Coverage" in nested_panels_by_title
    assert "Success Trend: Supply Chain" in nested_panels_by_title
    assert "Selected Test Pass Rate History" in nested_panels_by_title
    assert "Missing Coverage Metrics by Suite" in nested_panels_by_title
    assert "SonarQube API Up" in nested_panels_by_title
--- a/scripts/tests/test_mailu_sync.py
+++ b/scripts/tests/test_mailu_sync.py
@ -138,6 +138,100 @@ def test_kc_get_users_paginates(monkeypatch):
    assert sync.SESSION.calls == 1
 def test_kc_get_users_fetches_second_page_after_full_batch(monkeypatch):
    sync = load_sync_module(monkeypatch)
    class _PagedSession:
        def __init__(self):
            self.calls = 0
            self.first_params = []
        def get(self, *_, **kwargs):
            self.calls += 1
            self.first_params.append(kwargs["params"]["first"])
            if self.calls == 1:
                return _FakeResponse([{"id": f"u{i}"} for i in range(200)])
            return _FakeResponse([{"id": "last"}])
    sync.SESSION = _PagedSession()
    users = sync.kc_get_users("tok")
    assert len(users) == 201
    assert sync.SESSION.first_params == [0, 200]
 def test_get_kc_token_posts_client_credentials(monkeypatch):
    sync = load_sync_module(monkeypatch)
    calls = []
    class _TokenSession:
        def post(self, url, data, timeout):
            calls.append((url, data, timeout))
            return _FakeResponse({"access_token": "tok"})
    sync.SESSION = _TokenSession()
    assert sync.get_kc_token() == "tok"
    assert calls[0][1]["grant_type"] == "client_credentials"
 def test_retry_request_retries_then_succeeds(monkeypatch):
    sync = load_sync_module(monkeypatch)
    attempts = []
    sleeps = []
    def _flaky():
        attempts.append(1)
        if len(attempts) == 1:
            raise sync.requests.RequestException("temporary")
        return "ok"
    monkeypatch.setattr(sync.time, "sleep", lambda seconds: sleeps.append(seconds))
    assert sync.retry_request("request", _flaky, attempts=2) == "ok"
    assert sleeps == [2]
 def test_retry_request_reraises_final_error(monkeypatch):
    sync = load_sync_module(monkeypatch)
    monkeypatch.setattr(sync.time, "sleep", lambda seconds: None)
    with pytest.raises(sync.requests.RequestException):
        sync.retry_request(
            "request",
            lambda: (_ for _ in ()).throw(sync.requests.RequestException("nope")),
            attempts=1,
        )
 def test_retry_db_connect_retries_then_succeeds(monkeypatch):
    sync = load_sync_module(monkeypatch)
    attempts = []
    sleeps = []
    def _connect(**kwargs):
        attempts.append(kwargs)
        if len(attempts) == 1:
            raise sync.psycopg2.Error("not yet")
        return "conn"
    monkeypatch.setattr(sync.psycopg2, "connect", _connect)
    monkeypatch.setattr(sync.time, "sleep", lambda seconds: sleeps.append(seconds))
    assert sync.retry_db_connect(attempts=2) == "conn"
    assert sleeps == [2]
 def test_retry_db_connect_reraises_final_error(monkeypatch):
    sync = load_sync_module(monkeypatch)
    monkeypatch.setattr(sync.psycopg2, "connect", lambda **kwargs: (_ for _ in ()).throw(sync.psycopg2.Error("down")))
    monkeypatch.setattr(sync.time, "sleep", lambda seconds: None)
    with pytest.raises(sync.psycopg2.Error):
        sync.retry_db_connect(attempts=1)
 def test_ensure_mailu_user_skips_foreign_domain(monkeypatch):
    sync = load_sync_module(monkeypatch)
    executed = []
@ -166,6 +260,87 @@ def test_ensure_mailu_user_upserts(monkeypatch):
    assert captured["password"] != "pw"
 def test_attribute_and_email_helpers(monkeypatch):
    sync = load_sync_module(monkeypatch)
    assert sync.get_attribute_value({"x": ["first", "second"]}, "x") == "first"
    assert sync.get_attribute_value({"x": []}, "x") is None
    assert sync.get_attribute_value({"x": "value"}, "x") == "value"
    assert sync.mailu_enabled({"mailu_email": ["legacy@example.com"]}) is True
    assert sync.mailu_enabled({"mailu_enabled": ["off"]}) is False
    assert sync.resolve_mailu_email({"username": "fallback", "email": "user@example.com"}, {}) == "user@example.com"
    assert sync.resolve_mailu_email({"username": "fallback", "email": "user@other.com"}, {}) == "fallback@example.com"
 def test_safe_update_payload_filters_fields(monkeypatch):
    sync = load_sync_module(monkeypatch)
    payload = sync._safe_update_payload(
        {
            "username": "user",
            "enabled": True,
            "email": "user@example.com",
            "emailVerified": False,
            "firstName": "User",
            "lastName": "Example",
            "requiredActions": ["UPDATE_PASSWORD", 7],
            "attributes": "not-a-dict",
            "ignored": "value",
        }
    )
    assert payload == {
        "username": "user",
        "enabled": True,
        "email": "user@example.com",
        "emailVerified": False,
        "firstName": "User",
        "lastName": "Example",
        "requiredActions": ["UPDATE_PASSWORD"],
        "attributes": {},
    }
 def test_ensure_system_mailboxes_handles_configurations(monkeypatch, capsys):
    sync = load_sync_module(monkeypatch)
    ensured = []
    monkeypatch.setattr(sync, "MAILU_SYSTEM_USERS", ["postmaster@example.com", "abuse"])
    monkeypatch.setattr(sync, "MAILU_SYSTEM_PASSWORD", "")
    sync.ensure_system_mailboxes(object())
    assert "MAILU_SYSTEM_PASSWORD is missing" in capsys.readouterr().out
    def _ensure(cursor, email, password, display_name):
        ensured.append((email, password, display_name))
        if email == "abuse":
            raise RuntimeError("boom")
    monkeypatch.setattr(sync, "MAILU_SYSTEM_PASSWORD", "pw")
    monkeypatch.setattr(sync, "ensure_mailu_user", _ensure)
    sync.ensure_system_mailboxes(object())
    out = capsys.readouterr().out
    assert ensured == [
        ("postmaster@example.com", "pw", "postmaster"),
        ("abuse", "pw", "abuse"),
    ]
    assert "Ensured system mailbox for postmaster@example.com" in out
    assert "Failed to ensure system mailbox abuse" in out
 def test_main_exits_without_users_or_system_mailboxes(monkeypatch, capsys):
    sync = load_sync_module(monkeypatch)
    monkeypatch.setattr(sync, "MAILU_SYSTEM_USERS", [])
    monkeypatch.setattr(sync, "get_kc_token", lambda: "tok")
    monkeypatch.setattr(sync, "kc_get_users", lambda token: [])
    sync.main()
    assert "No users found; exiting." in capsys.readouterr().out
 def test_main_generates_password_and_upserts(monkeypatch):
    sync = load_sync_module(monkeypatch)
    monkeypatch.setattr(sync.bcrypt_sha256, "hash", lambda password: f"hash:{password}")
--- a/scripts/tests/test_mailu_sync_listener.py
+++ b/scripts/tests/test_mailu_sync_listener.py
@ -0,0 +1,134 @@
 import importlib.util
 import io
 import pathlib
 import types
 def load_listener_module(monkeypatch):
    monkeypatch.setenv("MAILU_SYNC_WAIT_TIMEOUT_SEC", "0")
    module_path = (
        pathlib.Path(__file__).resolve().parents[2]
        / "services"
        / "mailu"
        / "scripts"
        / "mailu_sync_listener.py"
    )
    spec = importlib.util.spec_from_file_location("mailu_sync_listener_testmod", module_path)
    module = importlib.util.module_from_spec(spec)
    assert spec.loader is not None
    spec.loader.exec_module(module)
    return module
 def _handler_for(listener, body):
    handler = listener.Handler.__new__(listener.Handler)
    raw = body if isinstance(body, bytes) else body.encode()
    handler.headers = {"Content-Length": str(len(raw))}
    handler.rfile = io.BytesIO(raw)
    handler.responses = []
    handler.headers_ended = 0
    handler.send_response = lambda code: handler.responses.append(code)
    handler.end_headers = lambda: setattr(handler, "headers_ended", handler.headers_ended + 1)
    return handler
 def test_listener_run_sync_blocking_updates_state(monkeypatch):
    listener = load_listener_module(monkeypatch)
    monkeypatch.setattr(listener, "time", lambda: 42.0)
    monkeypatch.setattr(
        listener.subprocess,
        "run",
        lambda command, check: types.SimpleNamespace(returncode=3),
    )
    assert listener._run_sync_blocking() == 3
    assert listener.last_rc == 3
    assert listener.last_run == 42.0
    assert listener.sync_done.is_set()
    listener.sync_running = True
    assert listener._run_sync_blocking() == 0
 def test_listener_trigger_sync_async_honors_running_and_debounce(monkeypatch):
    listener = load_listener_module(monkeypatch)
    starts = []
    class _Thread:
        def __init__(self, target, daemon):
            self.target = target
            self.daemon = daemon
        def start(self):
            starts.append((self.target, self.daemon))
    monkeypatch.setattr(listener.threading, "Thread", _Thread)
    monkeypatch.setattr(listener, "time", lambda: 100.0)
    listener.sync_running = True
    assert listener._trigger_sync_async() is False
    listener.sync_running = False
    listener.last_run = 95.0
    assert listener._trigger_sync_async() is False
    assert listener._trigger_sync_async(force=True) is True
    assert starts and starts[0][1] is True
 def test_listener_post_rejects_invalid_json(monkeypatch):
    listener = load_listener_module(monkeypatch)
    handler = _handler_for(listener, b"{not-json")
    handler.do_POST()
    assert handler.responses == [400]
    assert handler.headers_ended == 1
 def test_listener_post_triggers_async_without_wait(monkeypatch):
    listener = load_listener_module(monkeypatch)
    called = []
    monkeypatch.setattr(listener, "_trigger_sync_async", lambda force=False: called.append(force) or True)
    handler = _handler_for(listener, '{"force": true}')
    handler.do_POST()
    assert called == [True]
    assert handler.responses == [202]
 def test_listener_post_wait_returns_success_or_failure(monkeypatch):
    listener = load_listener_module(monkeypatch)
    called = []
    monkeypatch.setattr(listener, "_trigger_sync_async", lambda force=False: called.append(force) or True)
    listener.sync_running = False
    listener.last_rc = 0
    handler = _handler_for(listener, '{"wait": true, "force": true}')
    handler.do_POST()
    assert called == [True]
    assert handler.responses == [200]
    listener.last_rc = 2
    handler = _handler_for(listener, '{"wait": true}')
    handler.do_POST()
    assert handler.responses == [500]
 def test_listener_post_wait_keeps_running_request_successful(monkeypatch):
    listener = load_listener_module(monkeypatch)
    listener.sync_running = True
    handler = _handler_for(listener, '{"wait": true}')
    handler.do_POST()
    assert handler.responses == [200]
 def test_listener_log_message_is_quiet(monkeypatch):
    listener = load_listener_module(monkeypatch)
    handler = listener.Handler.__new__(listener.Handler)
    assert handler.log_message("ignored %s", "value") is None
--- a/services/ai-llm/deployment.yaml
+++ b/services/ai-llm/deployment.yaml
@ -5,7 +5,7 @@ metadata:
  name: ollama
  namespace: ai
 spec:
-  replicas: 1
+  replicas: 0
  revisionHistoryLimit: 2
  strategy:
    type: RollingUpdate
@ -21,7 +21,7 @@ spec:
        app: ollama
      annotations:
        ai.bstein.dev/model: qwen2.5:14b-instruct-q4_0
-        ai.bstein.dev/gpu: GPU pool (titan-22/24)
+        ai.bstein.dev/gpu: GPU pool (titan-20/21)
        ai.bstein.dev/restartedAt: "2026-01-26T12:00:00Z"
    spec:
      affinity:
@ -32,13 +32,13 @@ spec:
                  - key: kubernetes.io/hostname
                    operator: In
                    values:
-                      - titan-22
+                      - titan-20
-                      - titan-24
+                      - titan-21
      runtimeClassName: nvidia
      volumes:
        - name: models
          persistentVolumeClaim:
-            claimName: ollama-models
+            claimName: ollama-models-asteria
      initContainers:
        - name: warm-model
          image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d
--- a/services/ai-llm/pvc.yaml
+++ b/services/ai-llm/pvc.yaml
@ -2,12 +2,12 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
-  name: ollama-models
+  name: ollama-models-asteria
  namespace: ai
 spec:
  accessModes:
-    - ReadWriteOnce
+    - ReadWriteMany
  resources:
    requests:
      storage: 30Gi
-  storageClassName: astreae
+  storageClassName: asteria
--- a/services/bstein-dev-home/backend-deployment.yaml
+++ b/services/bstein-dev-home/backend-deployment.yaml
@ -49,6 +49,15 @@ spec:
      nodeSelector:
        kubernetes.io/arch: arm64
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: NotIn
                    values: ["titan-13", "titan-15", "titan-17", "titan-19"]
      imagePullSecrets:
        - name: harbor-regcred
      containers:
--- a/services/bstein-dev-home/chat-ai-gateway-deployment.yaml
+++ b/services/bstein-dev-home/chat-ai-gateway-deployment.yaml
@ -38,6 +38,36 @@ spec:
      nodeSelector:
        kubernetes.io/arch: arm64
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: atlas.bstein.dev/spillover
                    operator: DoesNotExist
            - weight: 95
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: NotIn
                    values:
                      - titan-13
                      - titan-15
                      - titan-17
                      - titan-19
            - weight: 90
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values: ["rpi5"]
            - weight: 50
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values: ["rpi4"]
      containers:
        - name: gateway
          image: python:3.11-slim
--- a/services/bstein-dev-home/frontend-deployment.yaml
+++ b/services/bstein-dev-home/frontend-deployment.yaml
@ -26,7 +26,7 @@ spec:
          imagePullPolicy: Always
          ports:
            - name: http
-              containerPort: 80
+              containerPort: 8080
          readinessProbe:
            httpGet:
              path: /
--- a/services/bstein-dev-home/frontend-service.yaml
+++ b/services/bstein-dev-home/frontend-service.yaml
@ -10,4 +10,4 @@ spec:
  ports:
    - name: http
      port: 80
-      targetPort: 80
+      targetPort: 8080
--- a/services/bstein-dev-home/kustomization.yaml
+++ b/services/bstein-dev-home/kustomization.yaml
@ -20,9 +20,9 @@ resources:
  - ingress.yaml
 images:
  - name: registry.bstein.dev/bstein/bstein-dev-home-frontend
-    newTag: 0.1.1-120 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend:tag"}
+    newTag: 0.1.1-267 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend:tag"}
  - name: registry.bstein.dev/bstein/bstein-dev-home-backend
-    newTag: 0.1.1-123 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend:tag"}
+    newTag: 0.1.1-267 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend:tag"}
 configMapGenerator:
  - name: chat-ai-gateway
    namespace: bstein-dev-home
--- a/services/harbor/helmrelease.yaml
+++ b/services/harbor/helmrelease.yaml
@ -53,7 +53,7 @@ spec:
        registry:
          existingClaim: harbor-registry
          accessMode: ReadWriteOnce
-          size: 50Gi
+          size: 100Gi
        jobservice:
          jobLog:
            existingClaim: harbor-jobservice-logs
@ -77,6 +77,7 @@ spec:
      internal:
        nodeSelector:
          ananke.bstein.dev/harbor-bootstrap: "true"
          kubernetes.io/hostname: titan-11
        image:
          repository: registry.bstein.dev/infra/harbor-redis
          tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-redis:tag"}
@ -113,6 +114,7 @@ spec:
    core:
      nodeSelector:
        ananke.bstein.dev/harbor-bootstrap: "true"
        kubernetes.io/hostname: titan-11
      image:
        repository: registry.bstein.dev/infra/harbor-core
        tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-core:tag"}
@ -125,6 +127,10 @@ spec:
      podAnnotations:
        vault.hashicorp.com/agent-inject: "true"
        vault.hashicorp.com/role: "harbor"
        vault.hashicorp.com/agent-requests-cpu: "25m"
        vault.hashicorp.com/agent-limits-cpu: "100m"
        vault.hashicorp.com/agent-requests-mem: "32Mi"
        vault.hashicorp.com/agent-limits-mem: "128Mi"
        vault.hashicorp.com/agent-inject-secret-harbor-core-env.sh: "kv/data/atlas/harbor/harbor-core"
        vault.hashicorp.com/agent-inject-template-harbor-core-env.sh: |
          {{ with secret "kv/data/atlas/harbor/harbor-core" }}
@ -174,6 +180,7 @@ spec:
    jobservice:
      nodeSelector:
        ananke.bstein.dev/harbor-bootstrap: "true"
        kubernetes.io/hostname: titan-11
      image:
        repository: registry.bstein.dev/infra/harbor-jobservice
        tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-jobservice:tag"}
@ -183,6 +190,10 @@ spec:
      podAnnotations:
        vault.hashicorp.com/agent-inject: "true"
        vault.hashicorp.com/role: "harbor"
        vault.hashicorp.com/agent-requests-cpu: "25m"
        vault.hashicorp.com/agent-limits-cpu: "100m"
        vault.hashicorp.com/agent-requests-mem: "32Mi"
        vault.hashicorp.com/agent-limits-mem: "128Mi"
        vault.hashicorp.com/agent-inject-secret-harbor-jobservice-env.sh: "kv/data/atlas/harbor/harbor-jobservice"
        vault.hashicorp.com/agent-inject-template-harbor-jobservice-env.sh: |
          {{ with secret "kv/data/atlas/harbor/harbor-core" }}
@ -216,6 +227,7 @@ spec:
    portal:
      nodeSelector:
        ananke.bstein.dev/harbor-bootstrap: "true"
        kubernetes.io/hostname: titan-11
      image:
        repository: registry.bstein.dev/infra/harbor-portal
        tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-portal:tag"}
@ -243,6 +255,7 @@ spec:
    registry:
      nodeSelector:
        ananke.bstein.dev/harbor-bootstrap: "true"
        kubernetes.io/hostname: titan-11
      registry:
        image:
          repository: registry.bstein.dev/infra/harbor-registry
@ -270,6 +283,10 @@ spec:
      podAnnotations:
        vault.hashicorp.com/agent-inject: "true"
        vault.hashicorp.com/role: "harbor"
        vault.hashicorp.com/agent-requests-cpu: "25m"
        vault.hashicorp.com/agent-limits-cpu: "100m"
        vault.hashicorp.com/agent-requests-mem: "32Mi"
        vault.hashicorp.com/agent-limits-mem: "128Mi"
        vault.hashicorp.com/agent-inject-secret-harbor-registry-env.sh: "kv/data/atlas/harbor/harbor-registry"
        vault.hashicorp.com/agent-inject-template-harbor-registry-env.sh: |
          {{ with secret "kv/data/atlas/harbor/harbor-registry" }}
@ -321,6 +338,7 @@ spec:
    nginx:
      nodeSelector:
        ananke.bstein.dev/harbor-bootstrap: "true"
        kubernetes.io/hostname: titan-11
      image:
        repository: registry.bstein.dev/infra/harbor-nginx
        tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-nginx:tag"}
--- a/services/harbor/pvc.yaml
+++ b/services/harbor/pvc.yaml
@ -8,7 +8,7 @@ spec:
  accessModes: [ "ReadWriteOnce" ]
  resources:
    requests:
-      storage: 50Gi
+      storage: 100Gi
  storageClassName: astreae
 ---
 apiVersion: v1
--- a/services/jellyfin/deployment.yaml
+++ b/services/jellyfin/deployment.yaml
@ -77,23 +77,26 @@ spec:
              mountPath: /config
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  - key: longhorn-host
                    operator: In
                    values:
                      - "true"
                  - key: node-role.kubernetes.io/worker
                    operator: In
                    values:
                      - "true"
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
-                  - key: kubernetes.io/hostname
+                  - key: hardware
                    operator: In
                    values:
-                      - titan-22
+                      - rpi5
            - weight: 80
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: In
                    values:
                      - titan-20
                      - titan-21
            - weight: 60
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
@ -105,7 +108,6 @@ spec:
        fsGroup: 65532
        fsGroupChangePolicy: OnRootMismatch
        runAsGroup: 65532
      runtimeClassName: nvidia
      containers:
        - name: jellyfin
          image: docker.io/jellyfin/jellyfin:10.11.5
@ -118,8 +120,6 @@ spec:
            - name: http
              containerPort: 8096
          env:
            - name: NVIDIA_DRIVER_CAPABILITIES
              value: "compute,video,utility"
            - name: JELLYFIN_PublishedServerUrl
              value: "https://stream.bstein.dev"
            - name: PUID
@ -131,12 +131,7 @@ spec:
            - name: VAULT_COPY_FILES
              value: /vault/secrets/ldap-config.xml:/config/plugins/configurations/LDAP-Auth.xml
          resources:
            limits:
              nvidia.com/gpu.shared: 1
            #   cpu: "4"
            #   memory: 8Gi
            requests:
              nvidia.com/gpu.shared: 1
              cpu: "500m"
              memory: 1Gi
          volumeMounts:
--- a/services/jenkins/configmap-jcasc.yaml
+++ b/services/jenkins/configmap-jcasc.yaml
@ -45,6 +45,17 @@ data:
              username: "${HARBOR_ROBOT_USERNAME}"
              password: "${HARBOR_ROBOT_PASSWORD}"
              description: "Harbor robot for pipelines"
          - usernamePassword:
              scope: GLOBAL
              id: harbor-robot-streaming
              username: "${HARBOR_STREAMING_ROBOT_USERNAME}"
              password: "${HARBOR_STREAMING_ROBOT_PASSWORD}"
              description: "Harbor robot for streaming pushes"
          - string:
              scope: GLOBAL
              id: sonarqube-token
              secret: "${SONARQUBE_TOKEN}"
              description: "SonarQube token for quality-gate evidence collection"
  jobs.yaml: |
    jobs:
      - script: |
@ -203,6 +214,32 @@ data:
              }
            }
          }
          pipelineJob('arcanagon') {
            properties {
              pipelineTriggers {
                triggers {
                  scmTrigger {
                    scmpoll_spec('H/5 * * * *')
                    ignorePostCommitHooks(false)
                  }
                }
              }
            }
            definition {
              cpsScm {
                scm {
                  git {
                    remote {
                      url('https://scm.bstein.dev/bstein/arcanagon.git')
                      credentials('gitea-pat')
                    }
                    branches('*/master')
                  }
                }
                scriptPath('Jenkinsfile')
              }
            }
          }
          pipelineJob('pegasus') {
            properties {
              pipelineTriggers {
@ -425,8 +462,10 @@ data:
            - name: "default"
              namespace: "jenkins"
              workspaceVolume:
-                emptyDirWorkspaceVolume:
+                dynamicPVC:
-                  memory: false
+                  accessModes: "ReadWriteOnce"
                  requestsSize: "20Gi"
                  storageClassName: "astreae"
              containers:
              - name: "jnlp"
                args: "^${computer.jnlpmac} ^${computer.name}"
@ -444,11 +483,45 @@ data:
                workingDir: /home/jenkins/agent
              idleMinutes: 0
              instanceCap: 2147483647
-              label: "jenkins-jenkins-agent"
+              label: "jenkins-jenkins-agent "
              nodeUsageMode: "NORMAL"
              podRetention: Never
              serviceAccount: "jenkins"
              slaveConnectTimeoutStr: "100"
              yaml: |
                spec:
                  affinity:
                    nodeAffinity:
                      preferredDuringSchedulingIgnoredDuringExecution:
                        - weight: 100
                          preference:
                            matchExpressions:
                              - key: atlas.bstein.dev/spillover
                                operator: DoesNotExist
                        - weight: 95
                          preference:
                            matchExpressions:
                              - key: kubernetes.io/hostname
                                operator: NotIn
                                values:
                                  - titan-13
                                  - titan-15
                                  - titan-17
                                  - titan-19
                        - weight: 85
                          preference:
                            matchExpressions:
                              - key: hardware
                                operator: In
                                values:
                                  - rpi5
                  topologySpreadConstraints:
                    - maxSkew: 1
                      topologyKey: kubernetes.io/hostname
                      whenUnsatisfiable: ScheduleAnyway
                      labelSelector:
                        matchLabels:
                          jenkins/jenkins-jenkins-agent: "true"
              yamlMergeStrategy: override
              inheritYamlMergeStrategy: false
      slaveAgentPort: 50000
--- a/services/jenkins/deployment.yaml
+++ b/services/jenkins/deployment.yaml
@ -33,17 +33,26 @@ spec:
          {{ with secret "kv/data/atlas/jenkins/harbor-robot-creds" }}
          HARBOR_ROBOT_USERNAME={{ .Data.data.username }}
          HARBOR_ROBOT_PASSWORD={{ .Data.data.password }}
          HARBOR_STREAMING_ROBOT_USERNAME={{ .Data.data.username }}
          HARBOR_STREAMING_ROBOT_PASSWORD={{ .Data.data.password }}
          {{ end }}
          {{ with secret "kv/data/atlas/jenkins/harbor-streaming-robot-creds" }}
          HARBOR_STREAMING_ROBOT_USERNAME={{ .Data.data.username }}
          HARBOR_STREAMING_ROBOT_PASSWORD={{ .Data.data.password }}
          {{ end }}
          {{ with secret "kv/data/atlas/shared/harbor-pull" }}
          {{- if and .Data.data.username .Data.data.password }}
-          HARBOR_ROBOT_USERNAME={{ .Data.data.username }}
+          HARBOR_PULL_USERNAME={{ .Data.data.username }}
-          HARBOR_ROBOT_PASSWORD={{ .Data.data.password }}
+          HARBOR_PULL_PASSWORD={{ .Data.data.password }}
          {{- end }}
          {{ end }}
          {{ with secret "kv/data/atlas/jenkins/gitea-pat" }}
          GITEA_PAT_USERNAME={{ .Data.data.username }}
          GITEA_PAT_TOKEN={{ .Data.data.token }}
          {{ end }}
          {{ with secret "kv/data/atlas/quality/sonarqube-oidc" }}
          SONARQUBE_TOKEN={{ .Data.data.sonarqube_exporter_token }}
          {{ end }}
          {{ with secret "kv/data/atlas/jenkins/webhook-tokens" }}
          TITAN_IAC_WEBHOOK_TOKEN={{ .Data.data.titan_iac_quality_gate }}
          GIT_NOTIFY_TOKEN_BSTEIN_DEV_HOME={{ .Data.data.git_notify_bstein_dev_home }}
@ -61,6 +70,21 @@ spec:
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: atlas.bstein.dev/spillover
                    operator: DoesNotExist
            - weight: 95
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: NotIn
                    values:
                      - titan-13
                      - titan-15
                      - titan-17
                      - titan-19
            - weight: 90
              preference:
                matchExpressions:
@ -79,6 +103,7 @@ spec:
            - sso.bstein.dev
      securityContext:
        fsGroup: 1000
        fsGroupChangePolicy: OnRootMismatch
      initContainers:
        - name: install-plugins
          image: jenkins/jenkins:2.528.3-jdk21
@ -155,7 +180,8 @@ spec:
              port: http
            initialDelaySeconds: 30
            periodSeconds: 10
-            failureThreshold: 20
+            timeoutSeconds: 5
            failureThreshold: 60
          volumeMounts:
            - name: jenkins-home
              mountPath: /var/jenkins_home
--- a/services/jenkins/serviceaccount.yaml
+++ b/services/jenkins/serviceaccount.yaml
@ -35,6 +35,9 @@ subjects:
  - kind: ServiceAccount
    name: jenkins
    namespace: jenkins
  - kind: ServiceAccount
    name: default
    namespace: jenkins
 roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
@ -60,6 +63,9 @@ subjects:
  - kind: ServiceAccount
    name: jenkins
    namespace: jenkins
  - kind: ServiceAccount
    name: default
    namespace: jenkins
 roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
--- a/services/jenkins/vault-sync-deployment.yaml
+++ b/services/jenkins/vault-sync-deployment.yaml
@ -18,6 +18,15 @@ spec:
      nodeSelector:
        kubernetes.io/arch: arm64
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: NotIn
                    values: ["titan-13", "titan-15", "titan-17", "titan-19"]
      containers:
        - name: sync
          image: alpine:3.20
--- a/services/keycloak/kustomization.yaml
+++ b/services/keycloak/kustomization.yaml
@ -24,7 +24,9 @@ resources:
  - oneoffs/logs-oidc-secret-ensure-job.yaml
  - oneoffs/metis-oidc-secret-ensure-job.yaml
  - oneoffs/soteria-oidc-secret-ensure-job.yaml
  - oneoffs/quality-oidc-secret-ensure-job.yaml
  - oneoffs/metis-ssh-keys-secret-ensure-job.yaml
  - oneoffs/metis-node-passwords-secret-ensure-job.yaml
  - oneoffs/harbor-oidc-secret-ensure-job.yaml
  - oneoffs/vault-oidc-secret-ensure-job.yaml
  - oneoffs/actual-oidc-secret-ensure-job.yaml
--- a/services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml
+++ b/services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml
@ -0,0 +1,110 @@
 # services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml
 # One-off job for sso/metis-node-passwords-secret-ensure-4.
 # Purpose: ensure per-node Metis recovery placeholders exist in Vault.
 # Atlas/root values are preserved while intranet IPs are standardized per node.
 apiVersion: batch/v1
 kind: Job
 metadata:
  name: metis-node-passwords-secret-ensure-4
  namespace: sso
 spec:
  backoffLimit: 0
  ttlSecondsAfterFinished: 3600
  template:
    spec:
      serviceAccountName: mas-secrets-ensure
      restartPolicy: Never
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  - key: node-role.kubernetes.io/worker
                    operator: Exists
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: kubernetes.io/arch
                    operator: In
                    values: ["arm64"]
      containers:
        - name: apply
          image: registry.bstein.dev/bstein/kubectl:1.35.0
          command: ["/bin/sh", "-c"]
          args:
            - |
              set -eu
              vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}"
              vault_role="${VAULT_ROLE:-sso-secrets}"
              jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
              login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')"
              vault_token="$(curl -sS --request POST --data "${login_payload}"                 "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')"
              if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then
                echo "vault login failed" >&2
                exit 1
              fi
              ensured=0
              while read -r node intranet_ip; do
                if [ -z "${node}" ] || [ -z "${intranet_ip}" ]; then
                  continue
                fi
                secret_path="kv/data/atlas/nodes/${node}"
                read_status="$(curl -sS -o /tmp/node-read.json -w "%{http_code}"                   -H "X-Vault-Token: ${vault_token}"                   "${vault_addr}/v1/${secret_path}" || true)"
                if [ "${read_status}" = "200" ]; then
                  atlas_password="$(jq -r '.data.data.atlas_password // empty' /tmp/node-read.json)"
                  root_password="$(jq -r '.data.data.root_password // empty' /tmp/node-read.json)"
                elif [ "${read_status}" = "404" ]; then
                  atlas_password=""
                  root_password=""
                else
                  echo "Vault read failed for ${node} (status ${read_status})" >&2
                  cat /tmp/node-read.json >&2 || true
                  exit 1
                fi
                payload="$(jq -nc                   --arg atlas_password "${atlas_password}"                   --arg root_password "${root_password}"                   --arg intranet_ip "${intranet_ip}"                   '{data:{atlas_password:$atlas_password,root_password:$root_password,intranet_ip:$intranet_ip}}')"
                write_status="$(curl -sS -o /tmp/node-write.json -w "%{http_code}" -X POST                   -H "X-Vault-Token: ${vault_token}"                   -H 'Content-Type: application/json'                   -d "${payload}"                   "${vault_addr}/v1/${secret_path}")"
                if [ "${write_status}" != "200" ] && [ "${write_status}" != "204" ]; then
                  echo "Vault write failed for ${node} (status ${write_status})" >&2
                  cat /tmp/node-write.json >&2 || true
                  exit 1
                fi
                ensured=$((ensured + 1))
                echo "Ensured node secret placeholder for ${node} (${intranet_ip})"
              done <<'EOF_NODES'
              titan-jh 192.168.22.8
              titan-db 192.168.22.10
              titan-0a 192.168.22.11
              titan-0b 192.168.22.12
              titan-0c 192.168.22.13
              titan-20 192.168.22.20
              titan-21 192.168.22.21
              titan-22 192.168.22.22
              titan-23 192.168.22.23
              titan-24 192.168.22.26
              titan-04 192.168.22.30
              titan-05 192.168.22.31
              titan-06 192.168.22.32
              titan-07 192.168.22.33
              titan-08 192.168.22.34
              titan-09 192.168.22.35
              titan-10 192.168.22.36
              titan-11 192.168.22.37
              titan-12 192.168.22.40
              titan-13 192.168.22.41
              titan-14 192.168.22.42
              titan-15 192.168.22.43
              titan-16 192.168.22.44
              titan-17 192.168.22.45
              titan-18 192.168.22.46
              titan-19 192.168.22.47
              EOF_NODES
              echo "Ensured ${ensured} Metis node placeholders in Vault"
--- a/services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml
+++ b/services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml
@ -73,7 +73,7 @@ spec:
              CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)"
              if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then
-                create_payload='{"clientId":"metis","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://sentinel.bstein.dev/oauth2/callback"],"webOrigins":["https://sentinel.bstein.dev"],"rootUrl":"https://sentinel.bstein.dev","baseUrl":"/"}'
+                create_payload='{"clientId":"metis","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://recovery.bstein.dev/oauth2/callback"],"webOrigins":["https://recovery.bstein.dev"],"rootUrl":"https://recovery.bstein.dev","baseUrl":"/"}'
                status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
                  -H "Authorization: Bearer ${ACCESS_TOKEN}" \
                  -H 'Content-Type: application/json' \
@ -121,7 +121,7 @@ spec:
                fi
              fi
-              update_payload='{"enabled":true,"clientId":"metis","protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://sentinel.bstein.dev/oauth2/callback"],"webOrigins":["https://sentinel.bstein.dev"],"rootUrl":"https://sentinel.bstein.dev","baseUrl":"/"}'
+              update_payload='{"enabled":true,"clientId":"metis","protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://recovery.bstein.dev/oauth2/callback"],"webOrigins":["https://recovery.bstein.dev"],"rootUrl":"https://recovery.bstein.dev","baseUrl":"/"}'
              status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \
                -H "Authorization: Bearer ${ACCESS_TOKEN}" \
                -H 'Content-Type: application/json' \
--- a/services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml
+++ b/services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml
@ -0,0 +1,198 @@
 # services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml
 # One-off job for sso/quality-oidc-secret-ensure-1.
 # Purpose: ensure the SonarQube oauth2-proxy OIDC client and Vault secret exist.
 # Keep this completed Job around; bump the suffix if it ever needs to be rerun.
 apiVersion: batch/v1
 kind: Job
 metadata:
  name: quality-oidc-secret-ensure-1
  namespace: sso
 spec:
  backoffLimit: 0
  template:
    metadata:
      annotations:
        vault.hashicorp.com/agent-inject: "true"
        vault.hashicorp.com/agent-pre-populate-only: "true"
        vault.hashicorp.com/role: "sso-secrets"
        vault.hashicorp.com/agent-inject-secret-keycloak-admin-env.sh: "kv/data/atlas/shared/keycloak-admin"
        vault.hashicorp.com/agent-inject-template-keycloak-admin-env.sh: |
          {{ with secret "kv/data/atlas/shared/keycloak-admin" }}
          export KEYCLOAK_ADMIN="{{ .Data.data.username }}"
          export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}"
          export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}"
          {{ end }}
    spec:
      serviceAccountName: mas-secrets-ensure
      restartPolicy: Never
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  - key: node-role.kubernetes.io/worker
                    operator: Exists
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: kubernetes.io/arch
                    operator: In
                    values: ["arm64"]
      containers:
        - name: apply
          image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
          command: ["/bin/sh", "-c"]
          args:
            - |
              set -euo pipefail
              . /vault/secrets/keycloak-admin-env.sh
              KC_URL="http://keycloak.sso.svc.cluster.local"
              ACCESS_TOKEN=""
              for attempt in 1 2 3 4 5; do
                TOKEN_JSON="$(curl -sS -X POST "$KC_URL/realms/master/protocol/openid-connect/token" \
                  -H 'Content-Type: application/x-www-form-urlencoded' \
                  -d "grant_type=password" \
                  -d "client_id=admin-cli" \
                  -d "username=${KEYCLOAK_ADMIN}" \
                  -d "password=${KEYCLOAK_ADMIN_PASSWORD}" || true)"
                ACCESS_TOKEN="$(echo "$TOKEN_JSON" | jq -r '.access_token' 2>/dev/null || true)"
                if [ -n "$ACCESS_TOKEN" ] && [ "$ACCESS_TOKEN" != "null" ]; then
                  break
                fi
                echo "Keycloak token request failed (attempt ${attempt})" >&2
                sleep $((attempt * 2))
              done
              if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then
                echo "Failed to fetch Keycloak admin token" >&2
                exit 1
              fi
              CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
                "$KC_URL/admin/realms/atlas/clients?clientId=sonarqube" || true)"
              CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)"
              if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then
                create_payload='{"clientId":"sonarqube","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://quality.bstein.dev/oauth2/callback"],"webOrigins":["https://quality.bstein.dev"],"rootUrl":"https://quality.bstein.dev","baseUrl":"/"}'
                status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
                  -H "Authorization: Bearer ${ACCESS_TOKEN}" \
                  -H 'Content-Type: application/json' \
                  -d "${create_payload}" \
                  "$KC_URL/admin/realms/atlas/clients")"
                if [ "$status" != "201" ] && [ "$status" != "204" ] && [ "$status" != "409" ]; then
                  echo "Keycloak client create failed (status ${status})" >&2
                  exit 1
                fi
                CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
                  "$KC_URL/admin/realms/atlas/clients?clientId=sonarqube" || true)"
                CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)"
              fi
              if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then
                echo "Keycloak client sonarqube not found" >&2
                exit 1
              fi
              SCOPE_ID="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
                "$KC_URL/admin/realms/atlas/client-scopes?search=groups" | jq -r '.[] | select(.name=="groups") | .id' 2>/dev/null | head -n1 || true)"
              if [ -z "$SCOPE_ID" ] || [ "$SCOPE_ID" = "null" ]; then
                echo "Keycloak client scope groups not found" >&2
                exit 1
              fi
              DEFAULT_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
                "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/default-client-scopes" || true)"
              OPTIONAL_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
                "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes" || true)"
              if ! echo "$DEFAULT_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1 \
                && ! echo "$OPTIONAL_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1; then
                status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \
                  -H "Authorization: Bearer ${ACCESS_TOKEN}" \
                  "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")"
                if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then
                  status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
                    -H "Authorization: Bearer ${ACCESS_TOKEN}" \
                    "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")"
                  if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then
                    echo "Failed to attach groups client scope to sonarqube (status ${status})" >&2
                    exit 1
                  fi
                fi
              fi
              update_payload='{"enabled":true,"clientId":"sonarqube","protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://quality.bstein.dev/oauth2/callback"],"webOrigins":["https://quality.bstein.dev"],"rootUrl":"https://quality.bstein.dev","baseUrl":"/"}'
              status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \
                -H "Authorization: Bearer ${ACCESS_TOKEN}" \
                -H 'Content-Type: application/json' \
                -d "${update_payload}" \
                "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}")"
              if [ "$status" != "204" ]; then
                echo "Keycloak client update failed (status ${status})" >&2
                exit 1
              fi
              CLIENT_SECRET="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
                "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value' 2>/dev/null || true)"
              if [ -z "$CLIENT_SECRET" ] || [ "$CLIENT_SECRET" = "null" ]; then
                echo "Keycloak client secret not found" >&2
                exit 1
              fi
              vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}"
              vault_role="${VAULT_ROLE:-sso-secrets}"
              jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
              login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')"
              vault_token="$(curl -sS --request POST --data "${login_payload}" \
                "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')"
              if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then
                echo "vault login failed" >&2
                exit 1
              fi
              read_status="$(curl -sS -o /tmp/sonarqube-oidc-read.json -w "%{http_code}" \
                -H "X-Vault-Token: ${vault_token}" \
                "${vault_addr}/v1/kv/data/atlas/quality/sonarqube-oidc" || true)"
              COOKIE_SECRET=""
              if [ "${read_status}" = "200" ]; then
                COOKIE_SECRET="$(jq -r '.data.data.cookie_secret // empty' /tmp/sonarqube-oidc-read.json)"
              elif [ "${read_status}" != "404" ]; then
                echo "Vault read failed (status ${read_status})" >&2
                cat /tmp/sonarqube-oidc-read.json >&2 || true
                exit 1
              fi
              if [ -n "${COOKIE_SECRET}" ]; then
                length="$(printf '%s' "${COOKIE_SECRET}" | wc -c | tr -d ' ')"
                if [ "${length}" != "16" ] && [ "${length}" != "24" ] && [ "${length}" != "32" ]; then
                  COOKIE_SECRET=""
                fi
              fi
              if [ -z "${COOKIE_SECRET}" ]; then
                COOKIE_SECRET="$(openssl rand -hex 16 | tr -d '\n')"
              fi
              payload="$(jq -nc \
                --arg client_id "sonarqube" \
                --arg client_secret "${CLIENT_SECRET}" \
                --arg cookie_secret "${COOKIE_SECRET}" \
                '{data:{client_id:$client_id,client_secret:$client_secret,cookie_secret:$cookie_secret}}')"
              write_status="$(curl -sS -o /tmp/sonarqube-oidc-write.json -w "%{http_code}" -X POST \
                -H "X-Vault-Token: ${vault_token}" \
                -H 'Content-Type: application/json' \
                -d "${payload}" "${vault_addr}/v1/kv/data/atlas/quality/sonarqube-oidc")"
              if [ "${write_status}" != "200" ] && [ "${write_status}" != "204" ]; then
                echo "Vault write failed (status ${write_status})" >&2
                cat /tmp/sonarqube-oidc-write.json >&2 || true
                exit 1
              fi
              verify_status="$(curl -sS -o /tmp/sonarqube-oidc-verify.json -w "%{http_code}" \
                -H "X-Vault-Token: ${vault_token}" \
                "${vault_addr}/v1/kv/data/atlas/quality/sonarqube-oidc" || true)"
              if [ "${verify_status}" != "200" ]; then
                echo "Vault verify failed (status ${verify_status})" >&2
                cat /tmp/sonarqube-oidc-verify.json >&2 || true
                exit 1
              fi
              echo "SonarQube OIDC secret ready in Vault"
--- a/services/logging/Jenkinsfile.data-prepper
+++ b/services/logging/Jenkinsfile.data-prepper
@ -8,7 +8,6 @@ spec:
  restartPolicy: Never
  serviceAccountName: jenkins
  nodeSelector:
    hardware: rpi5
    node-role.kubernetes.io/worker: "true"
  containers:
    - name: git
@ -16,6 +15,11 @@ spec:
      command:
        - cat
      tty: true
    - name: quality-tools
      image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
      command:
        - cat
      tty: true
    - name: kaniko
      image: gcr.io/kaniko-project/executor:v1.23.2-debug
      command:
@ -23,7 +27,7 @@ spec:
      tty: true
      resources:
        requests:
-          cpu: "500m"
+          cpu: "100m"
          memory: "1Gi"
        limits:
          cpu: "1500m"
@ -32,15 +36,26 @@ spec:
    }
  }
  environment {
-    SUITE_NAME = 'data-prepper'
+    SUITE_NAME = 'data_prepper'
    PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
    SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
    SONARQUBE_PROJECT_KEY = 'data_prepper'
    SONARQUBE_TOKEN = credentials('sonarqube-token')
    QUALITY_GATE_SONARQUBE_ENFORCE = '1'
    QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
    QUALITY_GATE_IRONBANK_ENFORCE = '1'
    QUALITY_GATE_IRONBANK_REQUIRED = '1'
    QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
  }
  parameters {
-    string(name: 'HARBOR_REPO', defaultValue: 'registry.bstein.dev/monitoring/data-prepper', description: 'Docker repository for Data Prepper')
+    string(name: 'HARBOR_REPO', defaultValue: 'registry.bstein.dev/streaming/data-prepper', description: 'Docker repository for Data Prepper')
    string(name: 'IMAGE_TAG', defaultValue: '2.8.0', description: 'Image tag to publish')
    booleanParam(name: 'PUSH_IMAGE', defaultValue: false, description: 'Publish image artifacts (manual release only)')
    booleanParam(name: 'PUSH_LATEST', defaultValue: true, description: 'Also push the latest tag')
  }
  options {
    disableConcurrentBuilds()
    buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
  }
  stages {
    stage('Checkout') {
      steps {
@ -49,19 +64,293 @@ spec:
        }
      }
    }
-    stage('Build & Push (optional)') {
+    stage('Collect quality evidence') {
-      when {
+      steps {
-        expression { return params.PUSH_IMAGE }
+        container('quality-tools') {
          sh '''#!/usr/bin/env bash
            set -euo pipefail
            mkdir -p build
            args=(
              "-Dsonar.host.url=${SONARQUBE_HOST_URL}"
              "-Dsonar.login=${SONARQUBE_TOKEN}"
              "-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
              "-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
              "-Dsonar.sources=services/logging,dockerfiles"
              "-Dsonar.inclusions=services/logging/Jenkinsfile.data-prepper,dockerfiles/Dockerfile.data-prepper"
              "-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**"
            )
            set +e
            sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
            sonar_rc=${PIPESTATUS[0]}
            sonar_report="${QUALITY_GATE_SONARQUBE_REPORT:-build/sonarqube-quality-gate.json}"
            host="${SONARQUBE_HOST_URL%/}"
            query="$(printf '%s' "${SONARQUBE_PROJECT_KEY}" | sed 's/ /%20/g')"
            sonar_ok=0
            if [ -n "${SONARQUBE_TOKEN:-}" ]; then
              auth="$(printf '%s:' "${SONARQUBE_TOKEN}" | base64 | tr -d '\\n')"
              if command -v curl >/dev/null 2>&1; then
                curl -fsS -H "Authorization: Basic ${auth}" "${host}/api/qualitygates/project_status?projectKey=${query}" > "${sonar_report}" && sonar_ok=1
              elif command -v wget >/dev/null 2>&1; then
                wget -qO "${sonar_report}" --header="Authorization: Basic ${auth}" "${host}/api/qualitygates/project_status?projectKey=${query}" && sonar_ok=1
              fi
            elif command -v curl >/dev/null 2>&1; then
              curl -fsS "${host}/api/qualitygates/project_status?projectKey=${query}" > "${sonar_report}" && sonar_ok=1
            elif command -v wget >/dev/null 2>&1; then
              wget -qO "${sonar_report}" "${host}/api/qualitygates/project_status?projectKey=${query}" && sonar_ok=1
            fi
            if [ "${sonar_ok}" -ne 1 ]; then
              cat > "${sonar_report}" <<EOF
 {
  "status": "ERROR",
  "error": "sonarqube query failed"
 }
 EOF
            fi
            scan_root=build/data-prepper-supply-chain-scan
            rm -rf "${scan_root}"
            mkdir -p "${scan_root}/dockerfiles" "${scan_root}/services/logging"
            cp dockerfiles/Dockerfile.data-prepper "${scan_root}/dockerfiles/Dockerfile.data-prepper"
            cp services/logging/Jenkinsfile.data-prepper "${scan_root}/services/logging/Jenkinsfile.data-prepper"
            trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL "${scan_root}"
            trivy_rc=$?
            set -e
            printf '%s\n' "${sonar_rc}" > build/sonarqube-analysis.rc
            if [ ! -s build/trivy-fs.json ]; then
              cat > build/ironbank-compliance.json <<EOF
 {"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
 EOF
              exit 0
            fi
            critical="$(jq '[.Results[]? | .Vulnerabilities[]? | select(.Severity=="CRITICAL")] | length' build/trivy-fs.json)"
            high="$(jq '[.Results[]? | .Vulnerabilities[]? | select(.Severity=="HIGH")] | length' build/trivy-fs.json)"
            secrets="$(jq '[.Results[]? | .Secrets[]?] | length' build/trivy-fs.json)"
            misconfigs="$(jq '[.Results[]? | .Misconfigurations[]? | select(.Status=="FAIL" and (.Severity=="CRITICAL" or .Severity=="HIGH"))] | length' build/trivy-fs.json)"
            status=ok
            compliant=true
            if [ "${critical}" -gt 0 ] || [ "${secrets}" -gt 0 ] || [ "${misconfigs}" -gt 0 ]; then
              status=failed
              compliant=false
            fi
            jq -n --arg status "${status}" --argjson compliant "${compliant}" --argjson critical "${critical}" --argjson high "${high}" --argjson secrets "${secrets}" --argjson misconfigs "${misconfigs}" --argjson trivy_rc "${trivy_rc}" \
              '{status:$status, compliant:$compliant, category:"image_compliance", scan_type:"filesystem", scanner:"trivy", critical_vulnerabilities:$critical, high_vulnerabilities:$high, secrets:$secrets, high_or_critical_misconfigurations:$misconfigs, trivy_rc:$trivy_rc, high_vulnerability_policy:"observe"}' > build/ironbank-compliance.json
          '''
        }
        container('git') {
          sh '''
            set -euo pipefail
            apk add --no-cache curl jq >/dev/null 2>&1 || true
            mkdir -p build
            sonar_report="${QUALITY_GATE_SONARQUBE_REPORT:-build/sonarqube-quality-gate.json}"
            if [ ! -f "${sonar_report}" ]; then
              if [ -n "${SONARQUBE_HOST_URL:-}" ] && [ -n "${SONARQUBE_PROJECT_KEY:-}" ]; then
                host="${SONARQUBE_HOST_URL%/}"
                query="$(printf '%s' "${SONARQUBE_PROJECT_KEY}" | sed 's/ /%20/g')"
                sonar_ok=0
                if [ -n "${SONARQUBE_TOKEN:-}" ]; then
                  auth="$(printf '%s:' "${SONARQUBE_TOKEN}" | base64 | tr -d '\\n')"
                  if curl -fsS -H "Authorization: Basic ${auth}" "${host}/api/qualitygates/project_status?projectKey=${query}" > "${sonar_report}"; then
                    sonar_ok=1
                  fi
                else
                  if curl -fsS "${host}/api/qualitygates/project_status?projectKey=${query}" > "${sonar_report}"; then
                    sonar_ok=1
                  fi
                fi
                if [ "${sonar_ok}" -ne 1 ]; then
                  cat > "${sonar_report}" <<EOF
 {
  "status": "ERROR",
  "error": "sonarqube query failed"
 }
 EOF
                fi
              else
                cat > "${sonar_report}" <<EOF
 {
  "status": "ERROR",
  "note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY"
 }
 EOF
              fi
            fi
            ironbank_report="${QUALITY_GATE_IRONBANK_REPORT:-build/ironbank-compliance.json}"
            if [ ! -f "${ironbank_report}" ]; then
              status="${IRONBANK_COMPLIANCE_STATUS:-unknown}"
              compliant="${IRONBANK_COMPLIANT:-}"
              if [ -n "${compliant}" ]; then
                compliant_lc="$(printf '%s' "${compliant}" | tr '[:upper:]' '[:lower:]')"
                compliant_json="null"
                case "${compliant_lc}" in
                  1|true|yes|on) compliant_json="true" ;;
                  0|false|no|off) compliant_json="false" ;;
                esac
                cat > "${ironbank_report}" <<EOF
 {
  "status": "${status}",
  "compliant": ${compliant_json},
  "note": "Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT or write build/ironbank-compliance.json in image-building repos."
 }
 EOF
              else
                cat > "${ironbank_report}" <<EOF
 {
  "status": "${status}",
  "note": "Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT or write build/ironbank-compliance.json in image-building repos."
 }
 EOF
              fi
            fi
          '''
        }
      }
    }
    stage('Validation tests') {
      steps {
        container('git') {
          sh '''#!/usr/bin/env sh
            set -eu
            mkdir -p build
            failures=0
            cases=""
            dockerfile_present_status="skipped"
            pipeline_config_present_status="skipped"
            logging_kustomization_includes_data_prepper_status="skipped"
            add_case() {
              name="$1"
              message="$2"
              status="passed"
              if [ -n "${message}" ]; then
                status="failed"
                failures=$((failures + 1))
                cases="${cases}"'<testcase classname="data_prepper.packaging" name="'"${name}"'"><failure message="'"${message}"'" /></testcase>'
              else
                cases="${cases}"'<testcase classname="data_prepper.packaging" name="'"${name}"'" />'
              fi
              case "${name}" in
                dockerfile_present) dockerfile_present_status="${status}" ;;
                pipeline_config_present) pipeline_config_present_status="${status}" ;;
                logging_kustomization_includes_data_prepper) logging_kustomization_includes_data_prepper_status="${status}" ;;
              esac
            }
            if [ -s dockerfiles/Dockerfile.data-prepper ]; then
              add_case "dockerfile_present" ""
            else
              add_case "dockerfile_present" "dockerfiles/Dockerfile.data-prepper is missing or empty"
            fi
            if [ -s services/logging/scripts/data_prepper_pipelines.yaml ]; then
              add_case "pipeline_config_present" ""
            else
              add_case "pipeline_config_present" "data_prepper_pipelines.yaml is missing or empty"
            fi
            kustomization_contents="$(cat services/logging/kustomization.yaml 2>/dev/null || true)"
            case "${kustomization_contents}" in
              *data-prepper-helmrelease.yaml*) add_case "logging_kustomization_includes_data_prepper" "" ;;
              *) add_case "logging_kustomization_includes_data_prepper" "services/logging/kustomization.yaml does not include data-prepper HelmRelease" ;;
            esac
            cat > build/junit-data-prepper.xml <<EOF
 <testsuite name="data_prepper.packaging" tests="3" failures="${failures}" errors="0" skipped="0">
 ${cases}
 </testsuite>
 EOF
            passed=$((3 - failures))
            cat > build/test-counts.env <<EOF
 test_passed_count=${passed}
 test_failed_count=${failures}
 test_error_count=0
 test_skipped_count=0
 EOF
            cat > build/testcase-status.env <<EOF
 dockerfile_present_status=${dockerfile_present_status}
 pipeline_config_present_status=${pipeline_config_present_status}
 logging_kustomization_includes_data_prepper_status=${logging_kustomization_includes_data_prepper_status}
 EOF
            if [ "${failures}" -ne 0 ]; then
              exit 1
            fi
          '''
        }
      }
    }
    stage('Enforce quality gate') {
      steps {
        container('git') {
          sh '''
            set -euo pipefail
            apk add --no-cache jq >/dev/null 2>&1 || true
            fail=0
            enabled() {
              case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
                1|true|yes|on) return 0 ;;
                *) return 1 ;;
              esac
            }
            if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
              sonar_status="$(jq -r '.status // .projectStatus.status // .qualityGate.status // empty' build/sonarqube-quality-gate.json 2>/dev/null | tr '[:upper:]' '[:lower:]')"
              [ -n "${sonar_status}" ] || sonar_status="missing"
              case "${sonar_status}" in
                ok|pass|passed|success) ;;
                *)
                  echo "sonarqube gate failed: ${sonar_status}" >&2
                  fail=1
                  ;;
              esac
            fi
            if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
              ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-1}"
              compliant="$(jq -r '.compliant // empty' build/ironbank-compliance.json 2>/dev/null || true)"
              supply_status=""
              if [ "${compliant}" = "true" ]; then
                supply_status="ok"
              elif [ "${compliant}" = "false" ]; then
                supply_status="failed"
              else
                supply_status="$(jq -r '.status // .result // .compliance // empty' build/ironbank-compliance.json 2>/dev/null | tr '[:upper:]' '[:lower:]')"
              fi
              [ -n "${supply_status}" ] || supply_status="missing"
              case "${supply_status}" in
                ok|pass|passed|success|compliant) ;;
                not_applicable|na|n/a)
                  if enabled "${ironbank_required}"; then
                    echo "supply chain gate required but status=${supply_status}" >&2
                    fail=1
                  fi
                  ;;
                *)
                  if enabled "${ironbank_required}"; then
                    echo "supply chain gate failed: ${supply_status}" >&2
                    fail=1
                  else
                    echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
                  fi
                  ;;
              esac
            fi
            exit "${fail}"
          '''
        }
      }
    }
    stage('Build & Push') {
      steps {
        container('kaniko') {
-          withCredentials([usernamePassword(credentialsId: 'harbor-robot', usernameVariable: 'HARBOR_USERNAME', passwordVariable: 'HARBOR_PASSWORD')]) {
+          withCredentials([usernamePassword(credentialsId: 'harbor-robot-streaming', usernameVariable: 'HARBOR_USERNAME', passwordVariable: 'HARBOR_PASSWORD')]) {
            sh '''
              set -euo pipefail
-              if [ -z "${HARBOR_REPO:-}" ]; then
+              IMAGE_TAG="${IMAGE_TAG:-2.8.0}"
-                HARBOR_REPO="registry.bstein.dev/monitoring/data-prepper"
+              PUSH_LATEST="${PUSH_LATEST:-true}"
              if [ -z "${HARBOR_REPO:-}" ] || [ "${HARBOR_REPO}" = "registry.bstein.dev/monitoring/data-prepper" ]; then
                HARBOR_REPO="registry.bstein.dev/streaming/data-prepper"
              fi
              IMAGE_TAG_SAFE="${IMAGE_TAG:-2.8.0}"
              mkdir -p /kaniko/.docker
              ref_host="$(echo "${HARBOR_REPO}" | cut -d/ -f1)"
              auth="$(printf "%s:%s" "${HARBOR_USERNAME}" "${HARBOR_PASSWORD}" | base64 | tr -d '\\n')"
@ -74,8 +363,8 @@ spec:
                }
              }
 EOF
-              dest_args="--destination ${HARBOR_REPO}:${IMAGE_TAG_SAFE}"
+              dest_args="--destination ${HARBOR_REPO}:${IMAGE_TAG}"
-              if [ "${PUSH_LATEST:-true}" = "true" ]; then
+              if [ "${PUSH_LATEST}" = "true" ]; then
                dest_args="${dest_args} --destination ${HARBOR_REPO}:latest"
              fi
              /kaniko/executor \
@ -88,32 +377,22 @@ EOF
        }
      }
    }
    stage('Smoke test suite') {
      steps {
        container('kaniko') {
          sh '''
            set -euo pipefail
            /kaniko/executor \
              --context "${WORKSPACE}" \
              --dockerfile "${WORKSPACE}/dockerfiles/Dockerfile.data-prepper" \
              --verbosity info \
              --no-push
          '''
        }
      }
    }
  }
  post {
-    success {
+    always {
      script {
        env.QUALITY_OUTCOME = currentBuild.currentResult == 'SUCCESS' ? 'ok' : 'failed'
      }
      container('git') {
        sh '''
          set -euo pipefail
-          apk add --no-cache curl >/dev/null 2>&1 || true
+          apk add --no-cache curl jq >/dev/null 2>&1 || true
          suite="${SUITE_NAME}"
          gateway="${PUSHGATEWAY_URL}"
          status="${QUALITY_OUTCOME:-failed}"
          fetch_counter() {
-            status="$1"
+            status_name="$1"
-            line="$(curl -fsS "${gateway}/metrics" 2>/dev/null | awk -v suite="${suite}" -v status="${status}" '
+            line="$(curl -fsS "${gateway}/metrics" 2>/dev/null | awk -v suite="${suite}" -v status="${status_name}" '
              /platform_quality_gate_runs_total/ {
                if (index($0, "job=\\"platform-quality-ci\\"") && index($0, "suite=\\"" suite "\\"") && index($0, "status=\\"" status "\\"")) {
                  print $2
@ -125,54 +404,130 @@ EOF
          }
          ok_count="$(fetch_counter ok)"
          failed_count="$(fetch_counter failed)"
-          ok_count=$((ok_count + 1))
+          if [ "${status}" = "ok" ]; then
-          tests_passed=1
+            ok_count=$((ok_count + 1))
-          tests_failed=0
+          else
-          cat <<METRICS | curl -fsS --data-binary @- "${gateway}/metrics/job/platform-quality-ci/suite/${suite}" >/dev/null
+            failed_count=$((failed_count + 1))
          fi
          sonarqube_check="not_applicable"
          if [ -f build/sonarqube-quality-gate.json ]; then
            sonar_status="$(jq -r '.status // .projectStatus.status // .qualityGate.status // empty' build/sonarqube-quality-gate.json 2>/dev/null | tr '[:upper:]' '[:lower:]')"
            if [ -n "${sonar_status}" ]; then
              case "${sonar_status}" in
                ok|pass|passed|success) sonarqube_check="ok" ;;
                *) sonarqube_check="failed" ;;
              esac
            else
              sonarqube_check="failed"
            fi
          fi
          supply_chain_check="not_applicable"
          if [ -f build/ironbank-compliance.json ]; then
            compliant="$(jq -r '.compliant // empty' build/ironbank-compliance.json 2>/dev/null)"
            if [ "${compliant}" = "true" ]; then
              supply_chain_check="ok"
            elif [ "${compliant}" = "false" ]; then
              supply_chain_check="failed"
            else
              ironbank_status="$(jq -r '.status // .result // .compliance // empty' build/ironbank-compliance.json 2>/dev/null | tr '[:upper:]' '[:lower:]')"
              case "${ironbank_status}" in
                ok|pass|passed|success|compliant) supply_chain_check="ok" ;;
                "") supply_chain_check="failed" ;;
                *) supply_chain_check="failed" ;;
              esac
            fi
          fi
          gate_glue_check="ok"
          if [ "${status}" != "ok" ]; then
            gate_glue_check="failed"
          fi
          metric_branch_raw="${BRANCH_NAME:-${GIT_BRANCH:-unknown}}"
          metric_branch_raw="${metric_branch_raw#origin/}"
          metric_branch="$(printf '%s' "${metric_branch_raw}" | jq -Rsa . | sed -e 's/^"//' -e 's/"$//')"
          metric_build_number="$(printf '%s' "${BUILD_NUMBER:-unknown}" | jq -Rsa . | sed -e 's/^"//' -e 's/"$//')"
          metric_jenkins_job="$(printf '%s' "${JOB_NAME:-data-prepper}" | jq -Rsa . | sed -e 's/^"//' -e 's/"$//')"
          export METRIC_SUITE="${suite}"
          export METRIC_BRANCH_RAW="${metric_branch_raw}"
          export METRIC_BUILD_NUMBER_RAW="${BUILD_NUMBER:-unknown}"
          export METRIC_JENKINS_JOB_RAW="${JOB_NAME:-data-prepper}"
          if [ ! -s build/test-counts.env ] || [ ! -s build/testcase-status.env ]; then
            cat > build/test-counts.env <<EOF
 test_passed_count=0
 test_failed_count=0
 test_error_count=0
 test_skipped_count=1
 EOF
            cat > build/testcase-status.env <<EOF
 dockerfile_present_status=skipped
 pipeline_config_present_status=skipped
 logging_kustomization_includes_data_prepper_status=skipped
 EOF
          fi
          . build/testcase-status.env
          if [ "${dockerfile_present_status}" = "skipped" ] && [ "${pipeline_config_present_status}" = "skipped" ] && [ "${logging_kustomization_includes_data_prepper_status}" = "skipped" ]; then
            cat > build/testcase-metrics.prom <<METRICS
 platform_quality_gate_test_case_result{suite="${suite}",branch="${metric_branch}",build_number="${metric_build_number}",jenkins_job="${metric_jenkins_job}",test="__no_test_cases__",status="skipped"} 1
 METRICS
          else
            cat > build/testcase-metrics.prom <<METRICS
 platform_quality_gate_test_case_result{suite="${suite}",branch="${metric_branch}",build_number="${metric_build_number}",jenkins_job="${metric_jenkins_job}",test="data_prepper.packaging::dockerfile_present",status="${dockerfile_present_status}"} 1
 platform_quality_gate_test_case_result{suite="${suite}",branch="${metric_branch}",build_number="${metric_build_number}",jenkins_job="${metric_jenkins_job}",test="data_prepper.packaging::pipeline_config_present",status="${pipeline_config_present_status}"} 1
 platform_quality_gate_test_case_result{suite="${suite}",branch="${metric_branch}",build_number="${metric_build_number}",jenkins_job="${metric_jenkins_job}",test="data_prepper.packaging::logging_kustomization_includes_data_prepper",status="${logging_kustomization_includes_data_prepper_status}"} 1
 METRICS
          fi
          . build/test-counts.env
          tests_check="ok"
          if [ "$((test_failed_count + test_error_count))" -gt 0 ]; then
            tests_check="failed"
          fi
          cat > build/platform-quality-metrics.prom <<METRICS
 # TYPE platform_quality_gate_runs_total counter
 platform_quality_gate_runs_total{suite="${suite}",status="ok"} ${ok_count}
 platform_quality_gate_runs_total{suite="${suite}",status="failed"} ${failed_count}
 # TYPE data_prepper_quality_gate_tests_total gauge
-data_prepper_quality_gate_tests_total{suite="${suite}",result="passed"} ${tests_passed}
+data_prepper_quality_gate_tests_total{suite="${suite}",result="passed"} ${test_passed_count}
-data_prepper_quality_gate_tests_total{suite="${suite}",result="failed"} ${tests_failed}
+data_prepper_quality_gate_tests_total{suite="${suite}",result="failed"} ${test_failed_count}
 data_prepper_quality_gate_tests_total{suite="${suite}",result="error"} ${test_error_count}
 data_prepper_quality_gate_tests_total{suite="${suite}",result="skipped"} ${test_skipped_count}
 # TYPE platform_quality_gate_workspace_line_coverage_percent gauge
 # No coverable project source is present in this packaging suite; report full
 # non-applicable coverage so rollups do not confuse N/A with uncovered code.
 platform_quality_gate_workspace_line_coverage_percent{suite="${suite}"} 100
 # TYPE platform_quality_gate_source_lines_over_500_total gauge
 platform_quality_gate_source_lines_over_500_total{suite="${suite}"} 0
 # TYPE platform_quality_gate_build_info gauge
 platform_quality_gate_build_info{suite="${suite}",branch="${metric_branch}",build_number="${metric_build_number}",jenkins_job="${metric_jenkins_job}"} 1
 # TYPE data_prepper_quality_gate_checks_total gauge
 data_prepper_quality_gate_checks_total{suite="${suite}",check="tests",result="${tests_check}"} 1
 data_prepper_quality_gate_checks_total{suite="${suite}",check="coverage",result="not_applicable"} 1
 data_prepper_quality_gate_checks_total{suite="${suite}",check="loc",result="not_applicable"} 1
 data_prepper_quality_gate_checks_total{suite="${suite}",check="docs_naming",result="not_applicable"} 1
 data_prepper_quality_gate_checks_total{suite="${suite}",check="gate_glue",result="${gate_glue_check}"} 1
 data_prepper_quality_gate_checks_total{suite="${suite}",check="sonarqube",result="${sonarqube_check}"} 1
 data_prepper_quality_gate_checks_total{suite="${suite}",check="supply_chain",result="${supply_chain_check}"} 1
 # TYPE platform_quality_gate_test_case_result gauge
 METRICS
          cat build/testcase-metrics.prom >> build/platform-quality-metrics.prom
          push_status="$(
            curl -sS -o build/pushgateway-response.txt -w '%{http_code}' -X PUT \
              --data-binary @build/platform-quality-metrics.prom \
              "${gateway}/metrics/job/platform-quality-ci/suite/${suite}" || true
          )"
          case "${push_status}" in
            200|202) ;;
            *)
              echo "warning: metrics push failed for suite=${suite} status=${push_status}" >&2
              cat build/pushgateway-response.txt >&2 || true
              ;;
          esac
        '''
      }
-    }
+      script {
-    failure {
+        if (fileExists('build/junit-data-prepper.xml')) {
-      container('git') {
+          echo 'JUnit XML generated and archived under build/; Jenkins junit step is not installed on this controller.'
-        sh '''
+        }
          set -euo pipefail
          apk add --no-cache curl >/dev/null 2>&1 || true
          suite="${SUITE_NAME}"
          gateway="${PUSHGATEWAY_URL}"
          fetch_counter() {
            status="$1"
            line="$(curl -fsS "${gateway}/metrics" 2>/dev/null | awk -v suite="${suite}" -v status="${status}" '
              /platform_quality_gate_runs_total/ {
                if (index($0, "job=\\"platform-quality-ci\\"") && index($0, "suite=\\"" suite "\\"") && index($0, "status=\\"" status "\\"")) {
                  print $2
                  exit
                }
              }
            ' || true)"
            [ -n "${line}" ] && printf '%s\n' "${line}" || printf '0\n'
          }
          ok_count="$(fetch_counter ok)"
          failed_count="$(fetch_counter failed)"
          failed_count=$((failed_count + 1))
          tests_passed=0
          tests_failed=1
          cat <<METRICS | curl -fsS --data-binary @- "${gateway}/metrics/job/platform-quality-ci/suite/${suite}" >/dev/null
 # TYPE platform_quality_gate_runs_total counter
 platform_quality_gate_runs_total{suite="${suite}",status="ok"} ${ok_count}
 platform_quality_gate_runs_total{suite="${suite}",status="failed"} ${failed_count}
 # TYPE data_prepper_quality_gate_tests_total gauge
 data_prepper_quality_gate_tests_total{suite="${suite}",result="passed"} ${tests_passed}
 data_prepper_quality_gate_tests_total{suite="${suite}",result="failed"} ${tests_failed}
 METRICS
        '''
      }
      archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
    }
  }
 }
--- a/services/logging/fluent-bit-helmrelease.yaml
+++ b/services/logging/fluent-bit-helmrelease.yaml
@ -44,8 +44,7 @@ spec:
          path: /var/log/journal
      - name: fluentbit-state
        emptyDir:
-          medium: Memory
+          sizeLimit: 1Gi
          sizeLimit: 64Mi
    extraVolumeMounts:
      - name: runlogjournal
        mountPath: /run/log/journal
--- a/services/logging/node-log-rotation-daemonset.yaml
+++ b/services/logging/node-log-rotation-daemonset.yaml
@ -12,6 +12,8 @@ spec:
    type: RollingUpdate
  template:
    metadata:
      annotations:
        logging.bstein.dev/node-log-rotation-rev: "2026-04-27-3"
      labels:
        app: node-log-rotation
    spec:
--- a/services/logging/scripts/node_log_rotation.sh
+++ b/services/logging/scripts/node_log_rotation.sh
@ -99,4 +99,24 @@ if [ "${changed}" -eq 1 ]; then
  fi
 fi
-sleep infinity
+trim_constrained_pod_logs() {
  local base usage
  for base in /host/mnt/astraios/var/log /host/var/log.hdd; do
    if [ ! -d "${base}/pods" ]; then
      continue
    fi
    usage="$(df -P "${base}" | awk 'NR==2 {gsub(/%/, "", $5); print $5}')"
    if [ -z "${usage}" ] || [ "${usage}" -lt 75 ]; then
      continue
    fi
    find "${base}/pods" -type f \( -name '[1-9]*.log' -o -name '*.log.20*' \) -size +1M -print -exec truncate -s 0 {} \; 2>/dev/null || true
    if [ -d "${base}/containers" ]; then
      find "${base}/containers" -xtype l -print -delete 2>/dev/null || true
    fi
  done
 }
 while true; do
  trim_constrained_pod_logs
  sleep 600
 done
--- a/services/mailu/helmrelease.yaml
+++ b/services/mailu/helmrelease.yaml
@ -764,6 +764,15 @@ spec:
              spec:
                template:
                  spec:
                    affinity:
                      nodeAffinity:
                        preferredDuringSchedulingIgnoredDuringExecution:
                          - weight: 100
                            preference:
                              matchExpressions:
                                - key: kubernetes.io/hostname
                                  operator: NotIn
                                  values: ["titan-13", "titan-15", "titan-17", "titan-19"]
                    containers:
                      - name: tika
                        env:
--- a/services/mailu/scripts/mailu_sync_listener.py
+++ b/services/mailu/scripts/mailu_sync_listener.py
@ -1,3 +1,5 @@
 """HTTP debounce wrapper for triggering the Mailu Keycloak sync job."""
 import http.server
 import json
 import os
--- a/services/maintenance/ariadne-deployment.yaml
+++ b/services/maintenance/ariadne-deployment.yaml
@ -106,6 +106,36 @@ spec:
      nodeSelector:
        kubernetes.io/arch: arm64
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: atlas.bstein.dev/spillover
                    operator: DoesNotExist
            - weight: 95
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: NotIn
                    values:
                      - titan-13
                      - titan-15
                      - titan-17
                      - titan-19
            - weight: 90
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values: ["rpi5"]
            - weight: 50
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values: ["rpi4"]
      containers:
        - name: ariadne
          image: registry.bstein.dev/bstein/ariadne:latest
--- a/services/maintenance/image.yaml
+++ b/services/maintenance/image.yaml
@ -36,11 +36,29 @@ spec:
 apiVersion: image.toolkit.fluxcd.io/v1beta2
 kind: ImagePolicy
 metadata:
-  name: metis
+  name: metis-amd64
  namespace: maintenance
 spec:
  imageRepositoryRef:
    name: metis
  filterTags:
    pattern: '^(?P<version>0\.1\.0-\d+)-amd64$'
    extract: '$version'
  policy:
    semver:
      range: ">=0.1.0-0"
 ---
 apiVersion: image.toolkit.fluxcd.io/v1beta2
 kind: ImagePolicy
 metadata:
  name: metis-arm64
  namespace: maintenance
 spec:
  imageRepositoryRef:
    name: metis
  filterTags:
    pattern: '^(?P<version>0\.1\.0-\d+)-arm64$'
    extract: '$version'
  policy:
    semver:
      range: ">=0.1.0-0"
@ -59,11 +77,29 @@ spec:
 apiVersion: image.toolkit.fluxcd.io/v1beta2
 kind: ImagePolicy
 metadata:
-  name: metis-sentinel
+  name: metis-sentinel-amd64
  namespace: maintenance
 spec:
  imageRepositoryRef:
    name: metis-sentinel
  filterTags:
    pattern: '^(?P<version>0\.1\.0-\d+)-amd64$'
    extract: '$version'
  policy:
    semver:
      range: ">=0.1.0-0"
 ---
 apiVersion: image.toolkit.fluxcd.io/v1beta2
 kind: ImagePolicy
 metadata:
  name: metis-sentinel-arm64
  namespace: maintenance
 spec:
  imageRepositoryRef:
    name: metis-sentinel
  filterTags:
    pattern: '^(?P<version>0\.1\.0-\d+)-arm64$'
    extract: '$version'
  policy:
    semver:
      range: ">=0.1.0-0"
--- a/services/maintenance/kustomization.yaml
+++ b/services/maintenance/kustomization.yaml
@ -6,8 +6,8 @@ resources:
  - image.yaml
  - secretproviderclass.yaml
  - metis-configmap.yaml
  - soteria-configmap.yaml
  - metis-data-pvc.yaml
  - soteria-configmap.yaml
  - vault-serviceaccount.yaml
  - vault-sync-deployment.yaml
  - ariadne-serviceaccount.yaml
@ -34,12 +34,9 @@ resources:
  - node-nofile-daemonset.yaml
  - metis-sentinel-amd64-daemonset.yaml
  - metis-sentinel-arm64-daemonset.yaml
  - metis-k3s-token-sync-cronjob.yaml
  - k3s-agent-restart-daemonset.yaml
  - pod-cleaner-cronjob.yaml
  - node-image-sweeper-serviceaccount.yaml
  - node-image-sweeper-daemonset.yaml
  - image-sweeper-cronjob.yaml
  - metis-service.yaml
  - soteria-networkpolicy.yaml
  - oauth2-proxy-soteria-networkpolicy.yaml
@ -51,12 +48,18 @@ resources:
  - metis-ingress.yaml
 images:
  - name: registry.bstein.dev/bstein/ariadne
-    newTag: 0.1.0-107 # {"$imagepolicy": "maintenance:ariadne:tag"}
+    newTag: 0.1.0-188 # {"$imagepolicy": "maintenance:ariadne:tag"}
  - name: registry.bstein.dev/bstein/metis
-    newTag: 0.1.0-9-amd64
+    newTag: 0.1.0-103-arm64 # {"$imagepolicy": "maintenance:metis-arm64:tag"}
  - name: registry.bstein.dev/bstein/soteria
    newTag: 0.1.0-36 # {"$imagepolicy": "maintenance:soteria:tag"}
 configMapGenerator:
  - name: metis-inventory
    namespace: maintenance
    files:
      - inventory.yaml=metis-inventory.yaml
    options:
      disableNameSuffixHash: true
  - name: disable-k3s-traefik-script
    namespace: maintenance
    files:
@ -75,12 +78,6 @@ configMapGenerator:
      - node_nofile.sh=scripts/node_nofile.sh
    options:
      disableNameSuffixHash: true
  - name: pod-cleaner-script
    namespace: maintenance
    files:
      - pod_cleaner.sh=scripts/pod_cleaner.sh
    options:
      disableNameSuffixHash: true
  - name: node-image-sweeper-script
    namespace: maintenance
    files:
--- a/services/maintenance/metis-certificate.yaml
+++ b/services/maintenance/metis-certificate.yaml
@ -2,12 +2,12 @@
 apiVersion: cert-manager.io/v1
 kind: Certificate
 metadata:
-  name: sentinel-tls
+  name: recovery-tls
  namespace: maintenance
 spec:
-  secretName: sentinel-tls
+  secretName: recovery-tls
  issuerRef:
    kind: ClusterIssuer
    name: letsencrypt
  dnsNames:
-    - sentinel.bstein.dev
+    - recovery.bstein.dev
--- a/services/maintenance/metis-configmap.yaml
+++ b/services/maintenance/metis-configmap.yaml
@ -8,19 +8,21 @@ data:
  METIS_BIND_ADDR: :8080
  METIS_INVENTORY_PATH: /app/inventory.titan-rpi4.yaml
  METIS_DATA_DIR: /var/lib/metis
-  METIS_DEFAULT_FLASH_HOST: titan-22
+  METIS_DEFAULT_FLASH_HOST: titan-20
-  METIS_FLASH_HOSTS: titan-22,titan-24,titan-20,titan-21,titan-19,titan-17,titan-15,titan-14,titan-12,titan-11,titan-10,titan-09,titan-08,titan-07,titan-06,titan-05,titan-04,titan-0c,titan-0b,titan-0a
+  METIS_FLASH_HOSTS: titan-20,titan-21,titan-22,titan-24,titan-19,titan-17,titan-15,titan-14,titan-12,titan-11,titan-10,titan-09,titan-08,titan-07,titan-06,titan-05,titan-04,titan-0c,titan-0b,titan-0a
-  METIS_LOCAL_HOST: titan-22
+  METIS_LOCAL_HOST: titan-20
  METIS_ALLOWED_GROUPS: admin,maintenance
  METIS_MAX_DEVICE_BYTES: "1000000000000"
  METIS_NAMESPACE: maintenance
-  METIS_RUNNER_IMAGE_AMD64: registry.bstein.dev/bstein/metis:0.1.0-23-amd64
+  METIS_REMOTE_POD_TIMEOUT_SEC: "14400"
-  METIS_RUNNER_IMAGE_ARM64: registry.bstein.dev/bstein/metis:0.1.0-23-arm64
+  METIS_RUNNER_IMAGE_AMD64: registry.bstein.dev/bstein/metis:0.1.0-103-amd64 # {"$imagepolicy": "maintenance:metis-amd64"}
  METIS_RUNNER_IMAGE_ARM64: registry.bstein.dev/bstein/metis:0.1.0-103-arm64 # {"$imagepolicy": "maintenance:metis-arm64"}
  METIS_HARBOR_REGISTRY: registry.bstein.dev
  METIS_HARBOR_PROJECT: metis
  METIS_HARBOR_API_BASE: https://registry.bstein.dev/api/v2.0
  METIS_HARBOR_USERNAME: admin
-  METIS_HOST_TMP_DIR: /tmp/metis-flash-test
+  METIS_HOST_TMP_DIR: /var/tmp/metis-flash-test
  METIS_REMOTE_WORKSPACE_DIR: /var/tmp/metis-workspace
  METIS_SENTINEL_PUSH_URL: http://metis.maintenance.svc.cluster.local/internal/sentinel/snapshot
  METIS_SENTINEL_INTERVAL_SEC: "1800"
  METIS_SENTINEL_NSENTER: "1"
--- a/services/maintenance/metis-data-pvc.yaml
+++ b/services/maintenance/metis-data-pvc.yaml
@ -2,7 +2,7 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
-  name: metis-data
+  name: metis-data-longhorn
  namespace: maintenance
 spec:
  accessModes:
@ -10,4 +10,4 @@ spec:
  resources:
    requests:
      storage: 40Gi
-  storageClassName: local-path
+  storageClassName: longhorn
--- a/services/maintenance/metis-deployment.yaml
+++ b/services/maintenance/metis-deployment.yaml
@ -18,7 +18,7 @@ spec:
        prometheus.io/scrape: "true"
        prometheus.io/port: "8080"
        prometheus.io/path: "/metrics"
-        metis.bstein.dev/config-rev: "2026-04-06-02"
+        metis.bstein.dev/config-rev: "2026-04-24-01"
        vault.hashicorp.com/agent-inject: "true"
        vault.hashicorp.com/agent-pre-populate-only: "true"
        vault.hashicorp.com/role: "maintenance"
@ -27,9 +27,15 @@ spec:
          {{ with secret "kv/data/atlas/maintenance/metis-runtime" }}
          export METIS_K3S_TOKEN="{{ .Data.data.k3s_token }}"
          {{ end }}
        vault.hashicorp.com/agent-inject-secret-metis-harbor-env.sh: "kv/data/atlas/harbor/harbor-core"
        vault.hashicorp.com/agent-inject-template-metis-harbor-env.sh: |
          {{ with secret "kv/data/atlas/harbor/harbor-core" }}
          export METIS_HARBOR_PASSWORD="{{ .Data.data.harbor_admin_password }}"
          {{ end }}
        vault.hashicorp.com/agent-inject-secret-metis-ssh-env.sh: "kv/data/atlas/maintenance/metis-ssh-keys"
        vault.hashicorp.com/agent-inject-template-metis-ssh-env.sh: |
          {{ with secret "kv/data/atlas/maintenance/metis-ssh-keys" }}
          export METIS_SSH_KEY_BASTION="{{ or .Data.data.bastion_pub .Data.data.brad_pub "" }}"
          export METIS_SSH_KEY_BRAD="{{ .Data.data.brad_pub }}"
          export METIS_SSH_KEY_ANANKE_TETHYS="{{ or .Data.data.ananke_tethys_pub .Data.data.hecate_tethys_pub "" }}"
          export METIS_SSH_KEY_ANANKE_DB="{{ or .Data.data.ananke_db_pub .Data.data.hecate_db_pub "" }}"
@ -37,10 +43,31 @@ spec:
    spec:
      serviceAccountName: metis
      terminationGracePeriodSeconds: 30
-      nodeSelector:
+      affinity:
-        kubernetes.io/hostname: titan-22
+        nodeAffinity:
-        kubernetes.io/arch: amd64
+          requiredDuringSchedulingIgnoredDuringExecution:
-        node-role.kubernetes.io/accelerator: "true"
+            nodeSelectorTerms:
              - matchExpressions:
                  - key: kubernetes.io/arch
                    operator: In
                    values:
                      - arm64
                  - key: longhorn-host
                    operator: In
                    values:
                      - "true"
                  - key: node-role.kubernetes.io/worker
                    operator: In
                    values:
                      - "true"
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values:
                      - rpi5
      containers:
        - name: metis
          image: registry.bstein.dev/bstein/metis:latest
@ -49,6 +76,7 @@ spec:
          args:
            - >-
              . /vault/secrets/metis-runtime-env.sh
              && . /vault/secrets/metis-harbor-env.sh
              && . /vault/secrets/metis-ssh-env.sh
              && exec metis serve
          envFrom:
@ -72,6 +100,9 @@ spec:
            periodSeconds: 5
            timeoutSeconds: 2
          volumeMounts:
            - name: metis-inventory
              mountPath: /etc/metis
              readOnly: true
            - name: metis-data
              mountPath: /var/lib/metis
            - name: host-dev
@ -93,9 +124,13 @@ spec:
            privileged: true
            runAsUser: 0
      volumes:
        - name: metis-inventory
          configMap:
            name: metis-inventory
            defaultMode: 0444
        - name: metis-data
          persistentVolumeClaim:
-            claimName: metis-data
+            claimName: metis-data-longhorn
        - name: host-dev
          hostPath:
            path: /dev
--- a/services/maintenance/metis-ingress.yaml
+++ b/services/maintenance/metis-ingress.yaml
@ -12,10 +12,10 @@ metadata:
 spec:
  ingressClassName: traefik
  tls:
-    - hosts: ["sentinel.bstein.dev"]
+    - hosts: ["recovery.bstein.dev"]
-      secretName: sentinel-tls
+      secretName: recovery-tls
  rules:
-    - host: sentinel.bstein.dev
+    - host: recovery.bstein.dev
      http:
        paths:
          - path: /
--- a/services/maintenance/metis-inventory.yaml
+++ b/services/maintenance/metis-inventory.yaml
@ -0,0 +1,150 @@
 # services/maintenance/metis-inventory.yaml
 classes:
  - name: rpi5-ubuntu-worker
    arch: arm64
    os: ubuntu-24.04
    image: ${METIS_IMAGE_RPI5_UBUNTU_WORKER}
    checksum: ${METIS_IMAGE_RPI5_UBUNTU_WORKER_SHA256}
    k3s_version: v1.33.3+k3s1
    default_labels:
      hardware: rpi5
      node-role.kubernetes.io/worker: "true"
  - name: rpi4-armbian-worker
    arch: arm64
    os: armbian-noble
    image: ${METIS_IMAGE_RPI4_ARMBIAN_LONGHORN}
    checksum: ${METIS_IMAGE_RPI4_ARMBIAN_LONGHORN_SHA256}
    k3s_version: v1.31.5+k3s1
    default_labels:
      hardware: rpi4
      node-role.kubernetes.io/worker: "true"
  - name: rpi4-armbian-longhorn
    arch: arm64
    os: armbian-noble
    image: ${METIS_IMAGE_RPI4_ARMBIAN_LONGHORN}
    checksum: ${METIS_IMAGE_RPI4_ARMBIAN_LONGHORN_SHA256}
    k3s_version: v1.31.5+k3s1
    default_labels:
      hardware: rpi4
      node-role.kubernetes.io/worker: "true"
 nodes:
  - name: titan-10
    class: rpi5-ubuntu-worker
    hostname: titan-10
    ip: 192.168.22.36
    k3s_role: agent
    k3s_url: https://192.168.22.7:6443
    k3s_token: ${METIS_K3S_TOKEN}
    ssh_user: ubuntu
    ssh_authorized_keys:
      - ${METIS_SSH_KEY_BRAD}
      - ${METIS_SSH_KEY_ANANKE_TETHYS}
      - ${METIS_SSH_KEY_ANANKE_DB}
  - name: titan-12
    class: rpi4-armbian-worker
    hostname: titan-12
    ip: 192.168.22.40
    k3s_role: agent
    k3s_url: https://192.168.22.7:6443
    k3s_token: ${METIS_K3S_TOKEN}
    ssh_user: atlas
    ssh_authorized_keys:
      - ${METIS_SSH_KEY_BRAD}
      - ${METIS_SSH_KEY_ANANKE_TETHYS}
      - ${METIS_SSH_KEY_ANANKE_DB}
  - name: titan-16
    class: rpi4-armbian-worker
    hostname: titan-16
    ip: 192.168.22.44
    k3s_role: agent
    k3s_url: https://192.168.22.7:6443
    k3s_token: ${METIS_K3S_TOKEN}
    ssh_user: atlas
    ssh_authorized_keys:
      - ${METIS_SSH_KEY_BRAD}
      - ${METIS_SSH_KEY_ANANKE_TETHYS}
      - ${METIS_SSH_KEY_ANANKE_DB}
    usb_scratch:
      mountpoint: /mnt/scratch
      label: titan-16-scratch
      fs: ext4
      bind_targets:
        - /var/lib/rancher
        - /var/log
  - name: titan-13
    class: rpi4-armbian-longhorn
    hostname: titan-13
    ip: 192.168.22.41
    k3s_role: agent
    k3s_url: https://192.168.22.7:6443
    k3s_token: ${METIS_K3S_TOKEN}
    ssh_user: atlas
    ssh_authorized_keys:
      - ${METIS_SSH_KEY_BRAD}
      - ${METIS_SSH_KEY_ANANKE_TETHYS}
      - ${METIS_SSH_KEY_ANANKE_DB}
    longhorn_disks:
      - mountpoint: /mnt/astreae
        uuid: 6031fa8b-f28c-45c3-b7bc-6133300e07c6
        fs: ext4
      - mountpoint: /mnt/asteria
        uuid: cbd4989d-62b5-4741-8b2a-28fdae259cae
        fs: ext4
  - name: titan-15
    class: rpi4-armbian-longhorn
    hostname: titan-15
    ip: 192.168.22.43
    k3s_role: agent
    k3s_url: https://192.168.22.7:6443
    k3s_token: ${METIS_K3S_TOKEN}
    ssh_user: atlas
    ssh_authorized_keys:
      - ${METIS_SSH_KEY_BRAD}
      - ${METIS_SSH_KEY_ANANKE_TETHYS}
      - ${METIS_SSH_KEY_ANANKE_DB}
    longhorn_disks:
      - mountpoint: /mnt/astreae
        uuid: f3362f14-5822-449f-944b-ac570b5cd615
        fs: ext4
      - mountpoint: /mnt/asteria
        uuid: 9c5316e6-f847-4884-b502-11f2d0d15d6f
        fs: ext4
  - name: titan-17
    class: rpi4-armbian-longhorn
    hostname: titan-17
    ip: 192.168.22.45
    k3s_role: agent
    k3s_url: https://192.168.22.7:6443
    k3s_token: ${METIS_K3S_TOKEN}
    ssh_user: atlas
    ssh_authorized_keys:
      - ${METIS_SSH_KEY_BRAD}
      - ${METIS_SSH_KEY_ANANKE_TETHYS}
      - ${METIS_SSH_KEY_ANANKE_DB}
    longhorn_disks:
      - mountpoint: /mnt/astreae
        uuid: 1fecdade-08b0-49cb-9ae3-be6c188b0a96
        fs: ext4
      - mountpoint: /mnt/asteria
        uuid: 2fe9f613-d372-47ca-b84f-82084e4edda0
        fs: ext4
  - name: titan-19
    class: rpi4-armbian-longhorn
    hostname: titan-19
    ip: 192.168.22.47
    k3s_role: agent
    k3s_url: https://192.168.22.7:6443
    k3s_token: ${METIS_K3S_TOKEN}
    ssh_user: atlas
    ssh_authorized_keys:
      - ${METIS_SSH_KEY_BRAD}
      - ${METIS_SSH_KEY_ANANKE_TETHYS}
      - ${METIS_SSH_KEY_ANANKE_DB}
    longhorn_disks:
      - mountpoint: /mnt/astreae
        uuid: 4890abb9-dda2-4f4f-9c0f-081ee82849cf
        fs: ext4
      - mountpoint: /mnt/asteria
        uuid: 2b4ea28d-b0e6-4fa3-841b-cd7067ae9153
        fs: ext4
--- a/services/maintenance/metis-rbac.yaml
+++ b/services/maintenance/metis-rbac.yaml
@ -12,6 +12,7 @@ rules:
      - list
      - watch
      - delete
      - patch
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
--- a/services/maintenance/metis-sentinel-amd64-daemonset.yaml
+++ b/services/maintenance/metis-sentinel-amd64-daemonset.yaml
@ -10,6 +10,8 @@ spec:
      app: metis-sentinel-amd64
  updateStrategy:
    type: RollingUpdate
    rollingUpdate:
      maxUnavailable: 25%
  template:
    metadata:
      labels:
@ -29,7 +31,7 @@ spec:
        kubernetes.io/arch: amd64
      containers:
        - name: metis-sentinel
-          image: registry.bstein.dev/bstein/metis-sentinel:0.1.0-0-amd64
+          image: registry.bstein.dev/bstein/metis-sentinel:0.1.0-103-amd64 # {"$imagepolicy": "maintenance:metis-sentinel-amd64"}
          imagePullPolicy: Always
          envFrom:
            - configMapRef:
--- a/services/maintenance/metis-sentinel-arm64-daemonset.yaml
+++ b/services/maintenance/metis-sentinel-arm64-daemonset.yaml
@ -10,6 +10,8 @@ spec:
      app: metis-sentinel-arm64
  updateStrategy:
    type: RollingUpdate
    rollingUpdate:
      maxUnavailable: 25%
  template:
    metadata:
      labels:
@ -29,7 +31,7 @@ spec:
        kubernetes.io/arch: arm64
      containers:
        - name: metis-sentinel
-          image: registry.bstein.dev/bstein/metis-sentinel:0.1.0-0-arm64
+          image: registry.bstein.dev/bstein/metis-sentinel:0.1.0-103-arm64 # {"$imagepolicy": "maintenance:metis-sentinel-arm64"}
          imagePullPolicy: Always
          envFrom:
            - configMapRef:
--- a/services/maintenance/oauth2-proxy-metis.yaml
+++ b/services/maintenance/oauth2-proxy-metis.yaml
@ -74,7 +74,7 @@ spec:
          args:
            - --provider=oidc
            - --config=/vault/secrets/oidc-config
-            - --redirect-url=https://sentinel.bstein.dev/oauth2/callback
+            - --redirect-url=https://recovery.bstein.dev/oauth2/callback
            - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas
            - --scope=openid profile email groups
            - --email-domain=*
@ -96,7 +96,7 @@ spec:
            - --approval-prompt=auto
            - --skip-jwt-bearer-tokens=true
            - --oidc-groups-claim=groups
-            - --cookie-domain=sentinel.bstein.dev
+            - --cookie-domain=recovery.bstein.dev
          ports:
            - containerPort: 4180
              name: http
--- a/services/maintenance/soteria-deployment.yaml
+++ b/services/maintenance/soteria-deployment.yaml
@ -32,6 +32,21 @@ spec:
                    operator: NotIn
                    values: ["titan-10"]
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: atlas.bstein.dev/spillover
                    operator: DoesNotExist
            - weight: 95
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: NotIn
                    values:
                      - titan-13
                      - titan-15
                      - titan-17
                      - titan-19
            - weight: 90
              preference:
                matchExpressions:
--- a/services/monitoring/dashboards/atlas-jobs.json
+++ b/services/monitoring/dashboards/atlas-jobs.json
--- a/services/monitoring/dashboards/atlas-overview.json
+++ b/services/monitoring/dashboards/atlas-overview.json
@ -1290,11 +1290,6 @@
          "refId": "B",
          "expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100)",
          "legendFormat": "Statera"
        },
        {
          "refId": "C",
          "expr": "sum((ananke_ups_load_percent{job=\"ananke-power\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\"}) / 100)",
          "legendFormat": "combined"
        }
      ],
      "fieldConfig": {
@ -2196,57 +2191,57 @@
      "targets": [
        {
          "refId": "A",
-          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0) or on() vector(0)",
+          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\"}[1h]))) > 0) or on() vector(0)",
          "legendFormat": "ariadne"
        },
        {
          "refId": "B",
-          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0) or on() vector(0)",
+          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"metis\"}[1h]))) > 0) or on() vector(0)",
          "legendFormat": "metis"
        },
        {
          "refId": "C",
-          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0) or on() vector(0)",
+          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\"}[1h]))) > 0) or on() vector(0)",
          "legendFormat": "ananke"
        },
        {
          "refId": "D",
-          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0) or on() vector(0)",
+          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\"}[1h]))) > 0) or on() vector(0)",
          "legendFormat": "atlasbot"
        },
        {
          "refId": "E",
-          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0) or on() vector(0)",
+          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\"}[1h]))) > 0) or on() vector(0)",
          "legendFormat": "lesavka"
        },
        {
          "refId": "F",
-          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0) or on() vector(0)",
+          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\"}[1h]))) > 0) or on() vector(0)",
          "legendFormat": "pegasus"
        },
        {
          "refId": "G",
-          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0) or on() vector(0)",
+          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\"}[1h]))) > 0) or on() vector(0)",
          "legendFormat": "soteria"
        },
        {
          "refId": "H",
-          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0) or on() vector(0)",
+          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\"}[1h]))) > 0) or on() vector(0)",
          "legendFormat": "titan-iac"
        },
        {
          "refId": "I",
-          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0) or on() vector(0)",
+          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\"}[1h]))) > 0) or on() vector(0)",
          "legendFormat": "bstein-home"
        },
        {
          "refId": "J",
-          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0) or on() vector(0)",
+          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\"}[1h]))) > 0) or on() vector(0)",
          "legendFormat": "arcanagon"
        },
        {
          "refId": "K",
-          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0) or on() vector(0)",
+          "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\"}[1h]))) > 0) or on() vector(0)",
          "legendFormat": "data-prepper"
        }
      ],
--- a/services/monitoring/dashboards/atlas-power.json
+++ b/services/monitoring/dashboards/atlas-power.json
@ -123,11 +123,6 @@
          "refId": "B",
          "expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100)",
          "legendFormat": "Statera"
        },
        {
          "refId": "C",
          "expr": "sum((ananke_ups_load_percent{job=\"ananke-power\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\"}) / 100)",
          "legendFormat": "combined"
        }
      ],
      "fieldConfig": {
@ -145,7 +140,7 @@
          "mode": "multi"
        }
      },
-      "description": "Historical UPS power consumption in watts for titan-db, tethys, and combined load."
+      "description": "Historical UPS power consumption in watts for titan-db and tethys."
    },
    {
      "id": 3,
--- a/services/monitoring/dashboards/atlas-testing.json
+++ b/services/monitoring/dashboards/atlas-testing.json
--- a/services/monitoring/grafana-dashboard-jobs.yaml
+++ b/services/monitoring/grafana-dashboard-jobs.yaml
--- a/services/monitoring/grafana-dashboard-overview.yaml
+++ b/services/monitoring/grafana-dashboard-overview.yaml
@ -1299,11 +1299,6 @@ data:
              "refId": "B",
              "expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100)",
              "legendFormat": "Statera"
            },
            {
              "refId": "C",
              "expr": "sum((ananke_ups_load_percent{job=\"ananke-power\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\"}) / 100)",
              "legendFormat": "combined"
            }
          ],
          "fieldConfig": {
@ -2205,57 +2200,57 @@ data:
          "targets": [
            {
              "refId": "A",
-              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0) or on() vector(0)",
+              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"ariadne\"}[1h]))) > 0) or on() vector(0)",
              "legendFormat": "ariadne"
            },
            {
              "refId": "B",
-              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0) or on() vector(0)",
+              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"metis\"}[1h]))) > 0) or on() vector(0)",
              "legendFormat": "metis"
            },
            {
              "refId": "C",
-              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0) or on() vector(0)",
+              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"ananke\"}[1h]))) > 0) or on() vector(0)",
              "legendFormat": "ananke"
            },
            {
              "refId": "D",
-              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0) or on() vector(0)",
+              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"atlasbot\"}[1h]))) > 0) or on() vector(0)",
              "legendFormat": "atlasbot"
            },
            {
              "refId": "E",
-              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0) or on() vector(0)",
+              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"lesavka\"}[1h]))) > 0) or on() vector(0)",
              "legendFormat": "lesavka"
            },
            {
              "refId": "F",
-              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0) or on() vector(0)",
+              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"pegasus|pegasus-health|pegasus_health\"}[1h]))) > 0) or on() vector(0)",
              "legendFormat": "pegasus"
            },
            {
              "refId": "G",
-              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0) or on() vector(0)",
+              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"soteria\"}[1h]))) > 0) or on() vector(0)",
              "legendFormat": "soteria"
            },
            {
              "refId": "H",
-              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0) or on() vector(0)",
+              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"titan-iac|titan_iac\"}[1h]))) > 0) or on() vector(0)",
              "legendFormat": "titan-iac"
            },
            {
              "refId": "I",
-              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0) or on() vector(0)",
+              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"bstein-home|bstein_home\"}[1h]))) > 0) or on() vector(0)",
              "legendFormat": "bstein-home"
            },
            {
              "refId": "J",
-              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0) or on() vector(0)",
+              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"arcanagon\"}[1h]))) > 0) or on() vector(0)",
              "legendFormat": "arcanagon"
            },
            {
              "refId": "K",
-              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0) or on() vector(0)",
+              "expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=~\"data-prepper|data_prepper\"}[1h]))) > 0) or on() vector(0)",
              "legendFormat": "data-prepper"
            }
          ],
--- a/services/monitoring/grafana-dashboard-power.yaml
+++ b/services/monitoring/grafana-dashboard-power.yaml
@ -132,11 +132,6 @@ data:
              "refId": "B",
              "expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100)",
              "legendFormat": "Statera"
            },
            {
              "refId": "C",
              "expr": "sum((ananke_ups_load_percent{job=\"ananke-power\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\"}) / 100)",
              "legendFormat": "combined"
            }
          ],
          "fieldConfig": {
@ -154,7 +149,7 @@ data:
              "mode": "multi"
            }
          },
-          "description": "Historical UPS power consumption in watts for titan-db, tethys, and combined load."
+          "description": "Historical UPS power consumption in watts for titan-db and tethys."
        },
        {
          "id": 3,
--- a/services/monitoring/grafana-dashboard-testing.yaml
+++ b/services/monitoring/grafana-dashboard-testing.yaml
--- a/services/monitoring/helmrelease.yaml
+++ b/services/monitoring/helmrelease.yaml
@ -312,7 +312,7 @@ spec:
    podAnnotations:
      vault.hashicorp.com/agent-inject: "true"
      vault.hashicorp.com/role: "monitoring"
-      monitoring.bstein.dev/restart-rev: "11"
+      monitoring.bstein.dev/restart-rev: "12"
      vault.hashicorp.com/agent-inject-secret-grafana-env.sh: "kv/data/atlas/monitoring/grafana-admin"
      vault.hashicorp.com/agent-inject-template-grafana-env.sh: |
        {{ with secret "kv/data/atlas/monitoring/grafana-admin" }}
@ -440,6 +440,7 @@ spec:
            type: file
            disableDeletion: false
            editable: false
            updateIntervalSeconds: 10
            options:
              path: /var/lib/grafana/dashboards/overview
          - name: overview-public
@ -448,6 +449,7 @@ spec:
            type: file
            disableDeletion: false
            editable: false
            updateIntervalSeconds: 10
            options:
              path: /var/lib/grafana/dashboards/overview-public
          - name: pods
@ -456,6 +458,7 @@ spec:
            type: file
            disableDeletion: false
            editable: true
            updateIntervalSeconds: 10
            options:
              path: /var/lib/grafana/dashboards/pods
          - name: nodes
@ -464,6 +467,7 @@ spec:
            type: file
            disableDeletion: false
            editable: true
            updateIntervalSeconds: 10
            options:
              path: /var/lib/grafana/dashboards/nodes
          - name: storage
@ -472,6 +476,7 @@ spec:
            type: file
            disableDeletion: false
            editable: true
            updateIntervalSeconds: 10
            options:
              path: /var/lib/grafana/dashboards/storage
          - name: gpu
@ -480,6 +485,7 @@ spec:
            type: file
            disableDeletion: false
            editable: true
            updateIntervalSeconds: 10
            options:
              path: /var/lib/grafana/dashboards/gpu
          - name: network
@ -488,6 +494,7 @@ spec:
            type: file
            disableDeletion: false
            editable: true
            updateIntervalSeconds: 10
            options:
              path: /var/lib/grafana/dashboards/network
          - name: mail
@ -496,6 +503,7 @@ spec:
            type: file
            disableDeletion: false
            editable: true
            updateIntervalSeconds: 10
            options:
              path: /var/lib/grafana/dashboards/mail
          - name: jobs
@ -504,6 +512,7 @@ spec:
            type: file
            disableDeletion: false
            editable: true
            updateIntervalSeconds: 10
            options:
              path: /var/lib/grafana/dashboards/jobs
          - name: testing
@ -512,6 +521,7 @@ spec:
            type: file
            disableDeletion: false
            editable: true
            updateIntervalSeconds: 10
            options:
              path: /var/lib/grafana/dashboards/testing
          - name: power
@ -520,6 +530,7 @@ spec:
            type: file
            disableDeletion: false
            editable: true
            updateIntervalSeconds: 10
            options:
              path: /var/lib/grafana/dashboards/power
    dashboardsConfigMaps:
--- a/services/monitoring/oneoffs/grafana-user-dedupe-job.yaml
+++ b/services/monitoring/oneoffs/grafana-user-dedupe-job.yaml
@ -1,12 +1,12 @@
 # services/monitoring/oneoffs/grafana-user-dedupe-job.yaml
-# One-off job for monitoring/grafana-user-dedupe-api-v7.
+# One-off job for monitoring/grafana-user-dedupe-api-v8.
-# Purpose: grafana user dedupe api v7 (see container args/env in this file).
+# Purpose: grafana user dedupe api v8 (see container args/env in this file).
 # Run by setting spec.suspend to false, reconcile, then set it back to true.
 # Safe to delete the finished Job/pod; it should not run continuously.
 apiVersion: batch/v1
 kind: Job
 metadata:
-  name: grafana-user-dedupe-api-v7
+  name: grafana-user-dedupe-api-v8
  namespace: monitoring
 spec:
  suspend: true
@ -43,13 +43,13 @@ spec:
                    values: ["arm64"]
      containers:
        - name: dedupe
-          image: python:3.12-slim
+          image: registry.bstein.dev/bstein/python:3.12-slim
          command:
            - /bin/sh
            - -c
          args:
            - |
-              set -euo pipefail
+              set -eu
              for _ in $(seq 1 30); do
                if [ -f /vault/secrets/grafana-env.sh ]; then
                  break
--- a/services/monitoring/scripts/platform_quality_suite_probe.sh
+++ b/services/monitoring/scripts/platform_quality_suite_probe.sh
@ -35,7 +35,7 @@ push_suite_counters() {
    failed_count=$((failed_count + 1))
  fi
-  cat <<METRICS | curl -fsS --data-binary @- "${PUSHGATEWAY_URL}/metrics/job/platform-quality-suite-probe/suite/${suite}" >/dev/null
+  cat <<METRICS | curl -fsS -X PUT --data-binary @- "${PUSHGATEWAY_URL}/metrics/job/platform-quality-suite-probe/suite/${suite}" >/dev/null
 # TYPE platform_quality_gate_runs_total counter
 platform_quality_gate_runs_total{suite="${suite}",status="ok"} ${ok_count}
 platform_quality_gate_runs_total{suite="${suite}",status="failed"} ${failed_count}
@ -73,8 +73,8 @@ check_http_suite() {
 failures=0
 check_http_suite "atlasbot" "http://atlasbot.comms.svc.cluster.local:8090/health" "200" '"status": "ok"' || failures=$((failures + 1))
-check_http_suite "pegasus-health" "http://pegasus.jellyfin.svc.cluster.local/healthz" "200" || failures=$((failures + 1))
+check_http_suite "pegasus" "http://pegasus.jellyfin.svc.cluster.local/healthz" "200" || failures=$((failures + 1))
-check_http_suite "bstein-home" "http://bstein-dev-home-backend.bstein-dev-home.svc.cluster.local/api/healthz" "200" || failures=$((failures + 1))
+check_http_suite "bstein_home" "http://bstein-dev-home-backend.bstein-dev-home.svc.cluster.local/api/healthz" "200" || failures=$((failures + 1))
 if [ "${failures}" -gt 0 ]; then
  printf '[probe] completed with %s suite failure(s)\n' "${failures}" >&2
--- a/services/openldap/statefulset.yaml
+++ b/services/openldap/statefulset.yaml
@ -29,6 +29,36 @@ spec:
      nodeSelector:
        kubernetes.io/arch: arm64
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: atlas.bstein.dev/spillover
                    operator: DoesNotExist
            - weight: 95
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: NotIn
                    values:
                      - titan-13
                      - titan-15
                      - titan-17
                      - titan-19
            - weight: 90
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values: ["rpi5"]
            - weight: 50
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values: ["rpi4"]
      serviceAccountName: sso-vault
      containers:
        - name: openldap
--- a/services/outline/deployment.yaml
+++ b/services/outline/deployment.yaml
@ -54,6 +54,34 @@ spec:
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: atlas.bstein.dev/spillover
                    operator: DoesNotExist
            - weight: 95
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: NotIn
                    values:
                      - titan-13
                      - titan-15
                      - titan-17
                      - titan-19
            - weight: 90
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values: ["rpi5"]
            - weight: 50
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values: ["rpi4"]
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
--- a/services/outline/redis-deployment.yaml
+++ b/services/outline/redis-deployment.yaml
@ -20,6 +20,34 @@ spec:
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: atlas.bstein.dev/spillover
                    operator: DoesNotExist
            - weight: 95
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: NotIn
                    values:
                      - titan-13
                      - titan-15
                      - titan-17
                      - titan-19
            - weight: 90
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values: ["rpi5"]
            - weight: 50
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values: ["rpi4"]
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
--- a/services/planka/deployment.yaml
+++ b/services/planka/deployment.yaml
@ -57,6 +57,34 @@ spec:
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: atlas.bstein.dev/spillover
                    operator: DoesNotExist
            - weight: 95
              preference:
                matchExpressions:
                  - key: kubernetes.io/hostname
                    operator: NotIn
                    values:
                      - titan-13
                      - titan-15
                      - titan-17
                      - titan-19
            - weight: 90
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values: ["rpi5"]
            - weight: 50
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values: ["rpi4"]
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
--- a/services/quality/kustomization.yaml
+++ b/services/quality/kustomization.yaml
@ -0,0 +1,16 @@
 # services/quality/kustomization.yaml
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
  - namespace.yaml
  - sonarqube-serviceaccount.yaml
  - quality-vault-serviceaccount.yaml
  - sonarqube-pvc.yaml
  - sonarqube-service.yaml
  - sonarqube-deployment.yaml
  - sonarqube-exporter-configmap.yaml
  - sonarqube-exporter-service.yaml
  - sonarqube-exporter-deployment.yaml
  - oauth2-proxy-sonarqube.yaml
  - sonarqube-certificate.yaml
  - sonarqube-ingress.yaml
--- a/services/quality/namespace.yaml
+++ b/services/quality/namespace.yaml
@ -0,0 +1,6 @@
 # services/quality/namespace.yaml
 apiVersion: v1
 kind: Namespace
 metadata:
  name: quality
--- a/services/quality/oauth2-proxy-sonarqube.yaml
+++ b/services/quality/oauth2-proxy-sonarqube.yaml
@ -0,0 +1,118 @@
 # services/quality/oauth2-proxy-sonarqube.yaml
 apiVersion: v1
 kind: Service
 metadata:
  name: oauth2-proxy-sonarqube
  namespace: quality
  labels:
    app: oauth2-proxy-sonarqube
 spec:
  ports:
    - name: http
      port: 80
      targetPort: 4180
  selector:
    app: oauth2-proxy-sonarqube
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: oauth2-proxy-sonarqube
  namespace: quality
  labels:
    app: oauth2-proxy-sonarqube
 spec:
  replicas: 2
  selector:
    matchLabels:
      app: oauth2-proxy-sonarqube
  template:
    metadata:
      labels:
        app: oauth2-proxy-sonarqube
      annotations:
        vault.hashicorp.com/agent-inject: "true"
        vault.hashicorp.com/role: "quality"
        vault.hashicorp.com/agent-inject-secret-oidc-config: "kv/data/atlas/quality/sonarqube-oidc"
        vault.hashicorp.com/agent-inject-template-oidc-config: |
          {{- with secret "kv/data/atlas/quality/sonarqube-oidc" -}}
          client_id = "{{ .Data.data.client_id }}"
          client_secret = "{{ .Data.data.client_secret }}"
          cookie_secret = "{{ .Data.data.cookie_secret }}"
          {{- end -}}
    spec:
      serviceAccountName: quality-vault-sync
      nodeSelector:
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  - key: kubernetes.io/arch
                    operator: In
                    values: ["arm64"]
                  - key: hardware
                    operator: In
                    values: ["rpi5"]
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values: ["rpi5"]
      containers:
        - name: oauth2-proxy
          image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0
          imagePullPolicy: IfNotPresent
          args:
            - --provider=oidc
            - --config=/vault/secrets/oidc-config
            - --redirect-url=https://quality.bstein.dev/oauth2/callback
            - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas
            - --scope=openid profile email groups
            - --email-domain=*
            - --allowed-group=admin
            - --allowed-group=/admin
            - --allowed-group=dev
            - --allowed-group=/dev
            - --set-xauthrequest=true
            - --pass-access-token=true
            - --set-authorization-header=true
            - --cookie-secure=true
            - --cookie-samesite=lax
            - --cookie-refresh=20m
            - --cookie-expire=168h
            - --insecure-oidc-allow-unverified-email=true
            - --upstream=http://sonarqube.quality.svc.cluster.local:9000
            - --http-address=0.0.0.0:4180
            - --skip-provider-button=true
            - --approval-prompt=auto
            - --skip-jwt-bearer-tokens=true
            - --oidc-groups-claim=groups
            - --cookie-domain=quality.bstein.dev
          ports:
            - containerPort: 4180
              name: http
          readinessProbe:
            httpGet:
              path: /ping
              port: 4180
            initialDelaySeconds: 5
            periodSeconds: 10
          livenessProbe:
            httpGet:
              path: /ping
              port: 4180
            initialDelaySeconds: 20
            periodSeconds: 20
          resources:
            requests:
              cpu: 25m
              memory: 64Mi
            limits:
              cpu: 250m
              memory: 256Mi
--- a/services/quality/quality-vault-serviceaccount.yaml
+++ b/services/quality/quality-vault-serviceaccount.yaml
@ -0,0 +1,7 @@
 # services/quality/quality-vault-serviceaccount.yaml
 apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: quality-vault-sync
  namespace: quality
--- a/services/quality/sonarqube-certificate.yaml
+++ b/services/quality/sonarqube-certificate.yaml
@ -0,0 +1,14 @@
 # services/quality/sonarqube-certificate.yaml
 apiVersion: cert-manager.io/v1
 kind: Certificate
 metadata:
  name: quality-tls
  namespace: quality
 spec:
  secretName: quality-tls
  issuerRef:
    kind: ClusterIssuer
    name: letsencrypt
  dnsNames:
    - quality.bstein.dev
--- a/services/quality/sonarqube-deployment.yaml
+++ b/services/quality/sonarqube-deployment.yaml
@ -0,0 +1,122 @@
 # services/quality/sonarqube-deployment.yaml
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: sonarqube
  namespace: quality
  labels:
    app: sonarqube
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: sonarqube
  template:
    metadata:
      labels:
        app: sonarqube
      annotations:
        vault.hashicorp.com/agent-inject: "true"
        vault.hashicorp.com/role: "quality"
        vault.hashicorp.com/agent-inject-secret-sonarqube-db-env.sh: "kv/data/atlas/quality/sonarqube-db"
        vault.hashicorp.com/agent-inject-template-sonarqube-db-env.sh: |
          {{- with secret "kv/data/atlas/quality/sonarqube-db" -}}
          export SONAR_JDBC_USERNAME="{{ .Data.data.username }}"
          export SONAR_JDBC_PASSWORD="{{ .Data.data.password }}"
          {{- end -}}
    spec:
      serviceAccountName: sonarqube
      nodeSelector:
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  - key: kubernetes.io/arch
                    operator: In
                    values: ["arm64"]
                  - key: hardware
                    operator: In
                    values: ["rpi5"]
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values: ["rpi5"]
      initContainers:
        - name: prepare-volume-permissions
          image: busybox:1.36
          command:
            - /bin/sh
            - -ec
            - |
              mkdir -p /opt/sonarqube/data /opt/sonarqube/extensions /opt/sonarqube/logs /opt/sonarqube/temp
              chown -R 1000:1000 /opt/sonarqube
          volumeMounts:
            - name: sonarqube-data
              mountPath: /opt/sonarqube
      containers:
        - name: sonarqube
          image: sonarqube:lts-community
          imagePullPolicy: IfNotPresent
          command:
            - /bin/bash
            - -ec
          args:
            - |
              set -euo pipefail
              . /vault/secrets/sonarqube-db-env.sh
              exec /opt/sonarqube/docker/entrypoint.sh
          env:
            - name: SONAR_JDBC_URL
              value: jdbc:postgresql://postgres-service.postgres.svc.cluster.local:5432/sonarqube
            - name: SONAR_ES_BOOTSTRAP_CHECKS_DISABLE
              value: "true"
            - name: SONAR_WEB_HOST
              value: "0.0.0.0"
          ports:
            - containerPort: 9000
              name: http
          readinessProbe:
            httpGet:
              path: /api/system/status
              port: 9000
            initialDelaySeconds: 60
            timeoutSeconds: 5
            periodSeconds: 10
            failureThreshold: 12
          livenessProbe:
            httpGet:
              path: /api/system/status
              port: 9000
            initialDelaySeconds: 120
            timeoutSeconds: 5
            periodSeconds: 20
            failureThreshold: 6
          resources:
            requests:
              cpu: 500m
              memory: 2Gi
            limits:
              cpu: "2"
              memory: 4Gi
          volumeMounts:
            - name: sonarqube-data
              mountPath: /opt/sonarqube/data
              subPath: data
            - name: sonarqube-data
              mountPath: /opt/sonarqube/extensions
              subPath: extensions
            - name: sonarqube-data
              mountPath: /opt/sonarqube/logs
              subPath: logs
            - name: sonarqube-data
              mountPath: /opt/sonarqube/temp
              subPath: temp
      volumes:
        - name: sonarqube-data
          persistentVolumeClaim:
            claimName: sonarqube-data
--- a/services/quality/sonarqube-exporter-configmap.yaml
+++ b/services/quality/sonarqube-exporter-configmap.yaml
@ -0,0 +1,192 @@
 # services/quality/sonarqube-exporter-configmap.yaml
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: sonarqube-exporter-script
  namespace: quality
 data:
  exporter.py: |
    #!/usr/bin/env python3
    import base64
    import json
    import os
    import threading
    import time
    import urllib.error
    import urllib.parse
    import urllib.request
    from http.server import BaseHTTPRequestHandler, HTTPServer
    SONARQUBE_URL = os.getenv("SONARQUBE_URL", "http://sonarqube.quality.svc.cluster.local:9000").strip().rstrip("/")
    SONARQUBE_TOKEN = os.getenv("SONARQUBE_TOKEN", "").strip()
    SONARQUBE_TIMEOUT_SECONDS = float(os.getenv("SONARQUBE_TIMEOUT_SECONDS", "10"))
    SONARQUBE_EXPORTER_PORT = int(os.getenv("SONARQUBE_EXPORTER_PORT", "9798"))
    SONARQUBE_EXPORTER_CACHE_TTL_SECONDS = int(os.getenv("SONARQUBE_EXPORTER_CACHE_TTL_SECONDS", "45"))
    SONARQUBE_PROJECT_LIMIT = int(os.getenv("SONARQUBE_PROJECT_LIMIT", "200"))
    CACHE_LOCK = threading.Lock()
    CACHE_EXPIRES_AT = 0.0
    CACHE_BODY = ""
    def _escape(value: str) -> str:
      return value.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n")
    def _fetch_json(path: str):
      url = f"{SONARQUBE_URL}{path}"
      req = urllib.request.Request(url, method="GET")
      if SONARQUBE_TOKEN:
        encoded = base64.b64encode(f"{SONARQUBE_TOKEN}:".encode("utf-8")).decode("utf-8")
        req.add_header("Authorization", f"Basic {encoded}")
      try:
        with urllib.request.urlopen(req, timeout=SONARQUBE_TIMEOUT_SECONDS) as resp:
          payload = json.loads(resp.read().decode("utf-8"))
          return payload, ""
      except urllib.error.HTTPError as exc:
        return None, f"http_{exc.code}"
      except Exception as exc:  # noqa: BLE001
        return None, exc.__class__.__name__
    def _metrics_body() -> str:
      lines = []
      now = time.time()
      scrape_success = 1
      lines.append("# HELP sonarqube_exporter_last_scrape_timestamp_seconds Unix timestamp when exporter last refreshed data.")
      lines.append("# TYPE sonarqube_exporter_last_scrape_timestamp_seconds gauge")
      lines.append(f"sonarqube_exporter_last_scrape_timestamp_seconds {now:.3f}")
      system_payload, system_error = _fetch_json("/api/system/status")
      system_status = "unknown"
      sonarqube_up = 0
      if isinstance(system_payload, dict):
        system_status = str(system_payload.get("status") or "unknown")
      elif system_error:
        system_status = system_error
        scrape_success = 0
      if system_status.upper() in {
        "UP",
        "STARTING",
        "DB_MIGRATION_NEEDED",
        "DB_MIGRATION_RUNNING",
      }:
        sonarqube_up = 1
      lines.append("# HELP sonarqube_up SonarQube API reachability and health (1=reachable/healthy-ish, 0=down).")
      lines.append("# TYPE sonarqube_up gauge")
      lines.append(f"sonarqube_up {sonarqube_up}")
      lines.append("# HELP sonarqube_system_status Current SonarQube system status label.")
      lines.append("# TYPE sonarqube_system_status gauge")
      lines.append(f'sonarqube_system_status{{status="{_escape(system_status)}"}} 1')
      projects_payload, projects_error = _fetch_json("/api/projects/search?ps=500&p=1")
      project_items = []
      projects_total = 0
      if isinstance(projects_payload, dict):
        paging = projects_payload.get("paging") or {}
        projects_total = int(paging.get("total") or 0)
        project_items = list(projects_payload.get("components") or [])
      else:
        scrape_success = 0
      lines.append("# HELP sonarqube_projects_total Total discovered SonarQube projects.")
      lines.append("# TYPE sonarqube_projects_total gauge")
      lines.append(f"sonarqube_projects_total {projects_total}")
      gate_counts = {}
      gate_fetch_errors = 0
      inspected = 0
      project_samples = []
      for project in project_items:
        if inspected >= SONARQUBE_PROJECT_LIMIT:
          break
        key = str(project.get("key") or "").strip()
        if not key:
          continue
        inspected += 1
        gate_payload, gate_error = _fetch_json(
          "/api/qualitygates/project_status?projectKey=" + urllib.parse.quote_plus(key)
        )
        if not isinstance(gate_payload, dict):
          gate_fetch_errors += 1
          continue
        project_status = gate_payload.get("projectStatus") or {}
        gate_status = str(project_status.get("status") or "UNKNOWN").upper()
        gate_counts[gate_status] = gate_counts.get(gate_status, 0) + 1
        is_ok = 1 if gate_status == "OK" else 0
        project_samples.append(
          f'sonarqube_project_quality_gate_pass{{project_key="{_escape(key)}",status="{_escape(gate_status)}"}} {is_ok}'
        )
      lines.append("# HELP sonarqube_project_quality_gate_pass Project quality gate pass state (1=OK, 0=not OK).")
      lines.append("# TYPE sonarqube_project_quality_gate_pass gauge")
      lines.extend(project_samples)
      lines.append("# HELP sonarqube_quality_gate_projects_total Number of projects by quality gate status.")
      lines.append("# TYPE sonarqube_quality_gate_projects_total gauge")
      for status, count in sorted(gate_counts.items()):
        lines.append(f'sonarqube_quality_gate_projects_total{{status="{_escape(status)}"}} {count}')
      lines.append("# HELP sonarqube_quality_gate_fetch_errors_total Number of project gate API fetch failures in the last scrape.")
      lines.append("# TYPE sonarqube_quality_gate_fetch_errors_total gauge")
      lines.append(f"sonarqube_quality_gate_fetch_errors_total {gate_fetch_errors}")
      lines.append("# HELP sonarqube_exporter_scrape_success Exporter scrape success (1=success, 0=partial/error).")
      lines.append("# TYPE sonarqube_exporter_scrape_success gauge")
      lines.append(f"sonarqube_exporter_scrape_success {scrape_success}")
      if projects_error:
        lines.append("# HELP sonarqube_exporter_projects_error Indicates projects API failure on the most recent scrape.")
        lines.append("# TYPE sonarqube_exporter_projects_error gauge")
        lines.append(f'sonarqube_exporter_projects_error{{error="{_escape(projects_error)}"}} 1')
      return "\n".join(lines) + "\n"
    def _get_metrics() -> str:
      global CACHE_BODY, CACHE_EXPIRES_AT
      now = time.time()
      with CACHE_LOCK:
        if CACHE_BODY and now < CACHE_EXPIRES_AT:
          return CACHE_BODY
        CACHE_BODY = _metrics_body()
        CACHE_EXPIRES_AT = now + max(5, SONARQUBE_EXPORTER_CACHE_TTL_SECONDS)
        return CACHE_BODY
    class Handler(BaseHTTPRequestHandler):
      def do_GET(self):  # noqa: N802
        if self.path in ("/-/healthy", "/healthz"):
          body = b"ok\n"
          self.send_response(200)
          self.send_header("Content-Type", "text/plain; charset=utf-8")
          self.send_header("Content-Length", str(len(body)))
          self.end_headers()
          self.wfile.write(body)
          return
        if self.path == "/metrics":
          body = _get_metrics().encode("utf-8")
          self.send_response(200)
          self.send_header("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
          self.send_header("Content-Length", str(len(body)))
          self.end_headers()
          self.wfile.write(body)
          return
        self.send_response(404)
        self.end_headers()
      def log_message(self, fmt, *args):  # noqa: A003
        return
    def main():
      server = HTTPServer(("0.0.0.0", SONARQUBE_EXPORTER_PORT), Handler)
      server.serve_forever()
    if __name__ == "__main__":
      main()
--- a/services/quality/sonarqube-exporter-deployment.yaml
+++ b/services/quality/sonarqube-exporter-deployment.yaml
@ -0,0 +1,108 @@
 # services/quality/sonarqube-exporter-deployment.yaml
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: sonarqube-exporter
  namespace: quality
  labels:
    app: sonarqube-exporter
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: sonarqube-exporter
  template:
    metadata:
      labels:
        app: sonarqube-exporter
      annotations:
        vault.hashicorp.com/agent-inject: "true"
        vault.hashicorp.com/role: "quality"
        vault.hashicorp.com/agent-inject-secret-sonarqube-exporter-env.sh: "kv/data/atlas/quality/sonarqube-oidc"
        vault.hashicorp.com/agent-inject-template-sonarqube-exporter-env.sh: |
          {{- with secret "kv/data/atlas/quality/sonarqube-oidc" -}}
          export SONARQUBE_TOKEN="{{ .Data.data.sonarqube_exporter_token }}"
          {{- end -}}
        prometheus.io/scrape: "true"
        prometheus.io/port: "9798"
        prometheus.io/path: /metrics
    spec:
      serviceAccountName: quality-vault-sync
      nodeSelector:
        node-role.kubernetes.io/worker: "true"
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  - key: kubernetes.io/arch
                    operator: In
                    values: ["arm64"]
                  - key: hardware
                    operator: In
                    values: ["rpi5"]
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: hardware
                    operator: In
                    values: ["rpi5"]
      containers:
        - name: exporter
          image: registry.bstein.dev/bstein/python:3.12-slim
          imagePullPolicy: IfNotPresent
          command:
            - /bin/sh
            - -ec
          args:
            - |
              if [ -f /vault/secrets/sonarqube-exporter-env.sh ]; then
                . /vault/secrets/sonarqube-exporter-env.sh
              fi
              cp /config/exporter.py /app/exporter.py
              python /app/exporter.py
          env:
            - name: SONARQUBE_URL
              value: http://sonarqube.quality.svc.cluster.local:9000
            - name: SONARQUBE_EXPORTER_PORT
              value: "9798"
            - name: SONARQUBE_EXPORTER_CACHE_TTL_SECONDS
              value: "45"
            - name: SONARQUBE_PROJECT_LIMIT
              value: "250"
          ports:
            - name: metrics
              containerPort: 9798
          readinessProbe:
            httpGet:
              path: /-/healthy
              port: 9798
            initialDelaySeconds: 5
            periodSeconds: 10
          livenessProbe:
            httpGet:
              path: /-/healthy
              port: 9798
            initialDelaySeconds: 20
            periodSeconds: 20
          resources:
            requests:
              cpu: 25m
              memory: 96Mi
            limits:
              cpu: 250m
              memory: 256Mi
          volumeMounts:
            - name: exporter-script
              mountPath: /config
              readOnly: true
            - name: app-tmp
              mountPath: /app
      volumes:
        - name: exporter-script
          configMap:
            name: sonarqube-exporter-script
            defaultMode: 493
        - name: app-tmp
          emptyDir: {}
--- a/services/quality/sonarqube-exporter-service.yaml
+++ b/services/quality/sonarqube-exporter-service.yaml
@ -0,0 +1,19 @@
 # services/quality/sonarqube-exporter-service.yaml
 apiVersion: v1
 kind: Service
 metadata:
  name: sonarqube-exporter
  namespace: quality
  labels:
    app: sonarqube-exporter
  annotations:
    prometheus.io/scrape: "true"
    prometheus.io/port: "9798"
    prometheus.io/path: /metrics
 spec:
  selector:
    app: sonarqube-exporter
  ports:
    - name: metrics
      port: 9798
      targetPort: metrics
--- a/services/quality/sonarqube-ingress.yaml
+++ b/services/quality/sonarqube-ingress.yaml
@ -0,0 +1,28 @@
 # services/quality/sonarqube-ingress.yaml
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: sonarqube
  namespace: quality
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt
    traefik.ingress.kubernetes.io/router.entrypoints: websecure
    traefik.ingress.kubernetes.io/router.tls: "true"
 spec:
  ingressClassName: traefik
  rules:
    - host: quality.bstein.dev
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: oauth2-proxy-sonarqube
                port:
                  number: 80
  tls:
    - hosts:
        - quality.bstein.dev
      secretName: quality-tls
--- a/services/quality/sonarqube-pvc.yaml
+++ b/services/quality/sonarqube-pvc.yaml
@ -0,0 +1,14 @@
 # services/quality/sonarqube-pvc.yaml
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: sonarqube-data
  namespace: quality
 spec:
  accessModes:
    - ReadWriteOnce
  resources:
    requests:
      storage: 20Gi
  storageClassName: astreae
--- a/services/quality/sonarqube-service.yaml
+++ b/services/quality/sonarqube-service.yaml
@ -0,0 +1,15 @@
 # services/quality/sonarqube-service.yaml
 apiVersion: v1
 kind: Service
 metadata:
  name: sonarqube
  namespace: quality
  labels:
    app: sonarqube
 spec:
  selector:
    app: sonarqube
  ports:
    - name: http
      port: 9000
      targetPort: 9000
--- a/services/quality/sonarqube-serviceaccount.yaml
+++ b/services/quality/sonarqube-serviceaccount.yaml
@ -0,0 +1,7 @@
 # services/quality/sonarqube-serviceaccount.yaml
 apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: sonarqube
  namespace: quality
--- a/services/vault/scripts/vault_k8s_auth_configure.sh
+++ b/services/vault/scripts/vault_k8s_auth_configure.sh
@ -87,6 +87,7 @@ write_policy_and_role() {
  service_accounts="$3"
  read_paths="$4"
  write_paths="$5"
  extra_rules="${6:-}"
  policy_body=""
  for path in ${read_paths}; do
@ -109,6 +110,11 @@ path \"kv/metadata/atlas/${path}\" {
 }
 "
  done
  if [ -n "${extra_rules}" ]; then
    policy_body="${policy_body}
 ${extra_rules}
 "
  fi
  log "writing policy ${role}"
  printf '%s\n' "${policy_body}" | vault_cmd policy write "${role}" -
@ -219,7 +225,7 @@ write_policy_and_role "nextcloud" "nextcloud" "nextcloud-vault" \
 write_policy_and_role "comms" "comms" "comms-vault,atlasbot" \
  "comms/* shared/chat-ai-keys-runtime shared/harbor-pull" ""
 write_policy_and_role "jenkins" "jenkins" "jenkins,jenkins-vault-sync" \
-  "jenkins/* shared/harbor-pull" ""
+  "jenkins/* shared/harbor-pull quality/sonarqube-oidc" ""
 write_policy_and_role "monitoring" "monitoring" "monitoring-vault-sync" \
  "monitoring/* shared/postmark-relay shared/harbor-pull" ""
 write_policy_and_role "logging" "logging" "logging-vault-sync" \
@ -231,7 +237,15 @@ write_policy_and_role "crypto" "crypto" "crypto-vault-sync" \
 write_policy_and_role "health" "health" "health-vault-sync" \
  "health/*" ""
 write_policy_and_role "maintenance" "maintenance" "ariadne,maintenance-vault-sync,metis" \
-  "maintenance/ariadne-db maintenance/metis-oidc maintenance/soteria-oidc maintenance/metis-ssh-keys maintenance/metis-runtime portal/atlas-portal-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret mailu/mailu-initial-account-secret nextcloud/nextcloud-db nextcloud/nextcloud-admin health/wger-admin finance/firefly-secrets comms/mas-admin-client-runtime comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin vault/vault-oidc-config shared/harbor-pull harbor/harbor-core" ""
+  "maintenance/ariadne-db maintenance/metis-oidc maintenance/soteria-oidc maintenance/metis-ssh-keys maintenance/metis-runtime portal/atlas-portal-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret mailu/mailu-initial-account-secret nextcloud/nextcloud-db nextcloud/nextcloud-admin health/wger-admin finance/firefly-secrets comms/mas-admin-client-runtime comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin vault/vault-oidc-config shared/harbor-pull shared/soteria-restic harbor/harbor-core" "" \
  '
 path "kv/data/atlas/nodes/*" {
  capabilities = ["read"]
 }
 path "kv/metadata/atlas/nodes/*" {
  capabilities = ["list"]
 }
 '
 write_policy_and_role "maintenance-metis-token-sync" "maintenance" "metis-token-sync" \
  "" \
  "maintenance/metis-runtime"
@ -249,7 +263,15 @@ write_policy_and_role "vault" "vault" "vault" \
 write_policy_and_role "sso-secrets" "sso" "mas-secrets-ensure" \
  "shared/keycloak-admin maintenance/metis-ssh-keys" \
-  "harbor/harbor-oidc vault/vault-oidc-config comms/synapse-oidc logging/oauth2-proxy-logs-oidc finance/actual-oidc maintenance/metis-oidc maintenance/soteria-oidc maintenance/metis-ssh-keys"
+  "harbor/harbor-oidc vault/vault-oidc-config comms/synapse-oidc logging/oauth2-proxy-logs-oidc finance/actual-oidc maintenance/metis-oidc maintenance/soteria-oidc maintenance/metis-ssh-keys" \
  '
 path "kv/data/atlas/nodes/*" {
  capabilities = ["create", "update", "read"]
 }
 path "kv/metadata/atlas/nodes/*" {
  capabilities = ["list"]
 }
 '
 write_policy_and_role "crypto-secrets" "crypto" "crypto-secrets-ensure" \
  "" \
  "crypto/wallet-monero-temp-rpc-auth"
--- a/testing/quality_contract.json
+++ b/testing/quality_contract.json
@ -1,5 +1,4 @@
 {
  "scope_note": "Quality-gate LOC/naming/coverage checks apply to managed automation and testing modules only, not broad Flux/Kubernetes manifest trees.",
  "required_docs": [
    {
      "path": "README.md",
@ -16,20 +15,32 @@
  ],
  "managed_modules": [
    "ci/scripts/publish_test_metrics.py",
    "ci/scripts/publish_test_metrics_quality.py",
    "ci/scripts/supply_chain_report.py",
    "services/mailu/scripts/mailu_sync.py",
    "services/mailu/scripts/mailu_sync_listener.py",
    "testing/__init__.py",
    "testing/quality_contract.py",
    "testing/quality_docs.py",
    "testing/quality_hygiene.py",
    "testing/quality_coverage.py",
-    "testing/quality_gate.py"
+    "testing/quality_gate.py",
    "ci/tests/glue/test_ariadne_schedules.py",
    "ci/tests/glue/test_glue_metrics.py",
      "testing/tests/test_publish_test_metrics.py",
      "testing/tests/test_supply_chain_report.py",
      "testing/tests/test_quality_contract.py",
    "testing/tests/test_quality_gate.py"
  ],
  "lint_paths": [
    "ci/scripts/publish_test_metrics.py",
    "ci/scripts/publish_test_metrics_quality.py",
    "ci/tests/glue",
    "scripts/tests",
    "services/comms/scripts/tests",
    "services/mailu/scripts/mailu_sync.py",
    "services/mailu/scripts/mailu_sync_listener.py",
    "testing/tests",
    "testing"
  ],
  "pytest_suites": {
@ -43,6 +54,7 @@
      "junit": "build/junit-unit.xml",
      "coverage_sources": [
        "ci/scripts",
        "scripts.dashboards_render_atlas",
        "services/mailu/scripts",
        "testing"
      ],
@ -70,6 +82,8 @@
      "hygiene",
      "unit",
      "coverage",
      "sonarqube",
      "ironbank",
      "glue"
    ]
  },
@ -107,7 +121,8 @@
      "ci/tests/**/*.py",
      "scripts/tests/**/*.py",
      "services/*/scripts/tests/**/*.py",
-      "services/mailu/scripts/mailu_sync.py"
+      "services/mailu/scripts/mailu_sync.py",
      "services/mailu/scripts/mailu_sync_listener.py"
    ],
    "naming_rules": [
      {
@ -151,6 +166,10 @@
    "minimum_percent": 95.0,
    "tracked_files": [
      "ci/scripts/publish_test_metrics.py",
      "ci/scripts/publish_test_metrics_quality.py",
      "ci/scripts/supply_chain_report.py",
      "services/mailu/scripts/mailu_sync.py",
      "services/mailu/scripts/mailu_sync_listener.py",
      "testing/quality_contract.py",
      "testing/quality_docs.py",
      "testing/quality_hygiene.py",
--- a/testing/quality_coverage.py
+++ b/testing/quality_coverage.py
@ -8,6 +8,7 @@ from typing import Any
 def _load_percentages(xml_path: Path, root: Path) -> dict[str, float]:
    """Load per-file line-rate percentages from a Cobertura XML report."""
    tree = ET.parse(xml_path)
    xml_root = tree.getroot()
    source_roots = [
@ -36,7 +37,11 @@ def _load_percentages(xml_path: Path, root: Path) -> dict[str, float]:
 def run_check(contract: dict[str, Any], root: Path, xml_path: Path) -> list[str]:
-    """Return human-readable issues for tracked files below the coverage floor."""
+    """Return human-readable issues for tracked files below the coverage floor.
    The report is intentionally per-file so a single weak module cannot hide
    behind aggregate suite coverage.
    """
    if not xml_path.exists():
        return [f"coverage xml missing: {xml_path.relative_to(root)}"]
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
jenkins	4beb08f1cf	scheduling: keep longhorn vault sync off storage nodes	2026-05-05 13:46:19 -03:00
jenkins	e2cbbd6963	scheduling: keep singleton apps off storage nodes	2026-05-05 13:37:04 -03:00
jenkins	c46764e80c	recovery(atlas): stop post-outage control-plane churn	2026-05-05 10:42:28 -03:00
jenkins	b81053aaec	ai(ollama): recover onto live jetson gpu pool	2026-05-05 06:42:15 -03:00
jenkins	9e659b790b	recovery(post-outage): restore jellyfin and maintenance sync	2026-05-05 06:31:09 -03:00
jenkins	c07220253e	maintenance(metis): run service on longhorn-ready workers	2026-05-05 06:19:15 -03:00
jenkins	39fb0e91e0	maintenance(metis): move runtime state to longhorn	2026-05-05 06:15:22 -03:00
jenkins	6243021ade	maintenance(metis): recover on arm64 builders	2026-05-05 06:12:06 -03:00
Brad Stein	4a6b54b4c3	logging: trim dated pod log rotations	2026-04-27 16:49:11 -03:00
Brad Stein	6c816e9fad	logging: trim constrained pod logs earlier	2026-04-27 16:42:02 -03:00
Brad Stein	2b5c7ca10b	logging: trim oversized rotated pod logs on constrained nodes	2026-04-27 16:31:57 -03:00
Brad Stein	45b145667a	longhorn: rerun settings ensure job	2026-04-27 16:16:51 -03:00
Brad Stein	9fb8dd4839	stability: harden fluent-bit buffering and longhorn node-down recovery	2026-04-27 16:15:13 -03:00
flux-bot	6352e0d976	chore(maintenance): automated image update	2026-04-26 00:59:25 +00:00
flux-bot	d4ff5d482e	chore(maintenance): automated image update	2026-04-26 00:59:05 +00:00
flux-bot	b303add71c	chore(maintenance): automated image update	2026-04-26 00:57:30 +00:00
flux-bot	a42e61de61	chore(maintenance): automated image update	2026-04-26 00:55:05 +00:00
Codex	6eb0158c6c	maintenance(metis): raise remote build timeout	2026-04-25 01:41:36 -03:00
Codex	0171ffad38	keycloak(metis): seed node intranet ips in vault	2026-04-24 22:18:58 -03:00
flux-bot	84934a6d1c	chore(maintenance): automated image update	2026-04-24 21:39:36 +00:00
flux-bot	98a2ade86d	chore(maintenance): automated image update	2026-04-24 21:39:18 +00:00
flux-bot	738a5184cb	chore(maintenance): automated image update	2026-04-24 21:37:35 +00:00
flux-bot	488c2694e3	chore(maintenance): automated image update	2026-04-24 21:36:19 +00:00
flux-bot	015d99dc5f	chore(maintenance): automated image update	2026-04-24 21:08:32 +00:00
flux-bot	b80745dc2d	chore(maintenance): automated image update	2026-04-24 21:08:15 +00:00
jenkins	0fa1b38f95	recovery(metis): trim node vault password placeholders	2026-04-24 18:07:35 -03:00
flux-bot	49e714c88c	chore(maintenance): automated image update	2026-04-24 21:07:32 +00:00
flux-bot	ff0b9762b1	chore(maintenance): automated image update	2026-04-24 21:05:15 +00:00
jenkins	ce36ff099b	recovery(metis): rerun node password seeding job	2026-04-24 17:33:40 -03:00
jenkins	6c4a7dea29	recovery(metis): use atlas kv node secrets	2026-04-24 17:29:58 -03:00
jenkins	04a80c1168	recovery(metis): seed per-node vault password slots	2026-04-24 17:24:37 -03:00
flux-bot	8179bd85db	chore(maintenance): automated image update	2026-04-24 20:19:26 +00:00
flux-bot	c08499b52d	chore(maintenance): automated image update	2026-04-24 20:19:10 +00:00
flux-bot	eca9e494ad	chore(maintenance): automated image update	2026-04-24 20:17:26 +00:00
flux-bot	ab0e68f9f3	chore(maintenance): automated image update	2026-04-24 20:15:10 +00:00
flux-bot	0566a47e35	chore(maintenance): automated image update	2026-04-24 17:50:13 +00:00
flux-bot	133597bfd0	chore(maintenance): automated image update	2026-04-24 17:49:55 +00:00
flux-bot	ccf318f977	chore(maintenance): automated image update	2026-04-24 17:48:12 +00:00
flux-bot	8affc052bf	chore(maintenance): automated image update	2026-04-24 17:46:54 +00:00
flux-bot	0cf5043977	chore(maintenance): automated image update	2026-04-24 17:20:52 +00:00
flux-bot	f2ffc6c1ef	chore(maintenance): automated image update	2026-04-24 17:19:09 +00:00
flux-bot	e7c770b10b	chore(maintenance): automated image update	2026-04-24 17:17:52 +00:00
jenkins	0ac3c97f90	maintenance(metis): restore full helper image refs	2026-04-24 13:51:12 -03:00
flux-bot	3e5e37d65a	chore(maintenance): automated image update	2026-04-24 16:11:02 +00:00
flux-bot	2acbcbff51	chore(maintenance): automated image update	2026-04-24 16:10:45 +00:00
flux-bot	70b382bc80	chore(maintenance): automated image update	2026-04-24 16:09:02 +00:00
flux-bot	d0191361d4	chore(maintenance): automated image update	2026-04-24 16:06:44 +00:00
flux-bot	59bb0bef78	chore(maintenance): automated image update	2026-04-24 15:56:37 +00:00
jenkins	4b456cf54a	maintenance(metis): track arch-specific images	2026-04-24 12:55:47 -03:00
jenkins	91c6023d25	maintenance(metis): move ingress to recovery host	2026-04-24 10:51:09 -03:00
jenkins	85d15cd3e1	maintenance(metis): raise remote pod timeout for recovery builds	2026-04-24 00:01:43 -03:00
jenkins	c0a4cbf03e	maintenance(metis): fix remote workspace permissions	2026-04-23 23:45:18 -03:00
jenkins	fad895efbb	maintenance(metis): move build scratch to usb storage	2026-04-23 23:37:00 -03:00
jenkins	47b31ebcf4	monitoring(testing): collapse heavy drilldowns	2026-04-22 16:56:52 -03:00
jenkins	88d2225774	test(titan-iac): cover dashboard generator contract	2026-04-22 15:31:36 -03:00
jenkins	a1f6758b95	monitoring(grafana): refresh provisioned dashboards	2026-04-22 15:13:26 -03:00
jenkins	23146aaa8a	monitoring(testing): clean canonical suite rows	2026-04-22 14:34:40 -03:00
jenkins	cc757ba082	ci(data-prepper): quote testcase metrics correctly	2026-04-22 13:28:35 -03:00
jenkins	c3c8b60671	ci(data-prepper): retrigger archive fix	2026-04-22 13:23:23 -03:00
jenkins	15792b1cf3	ci(data-prepper): archive junit without plugin dependency	2026-04-22 13:21:52 -03:00
jenkins	e75a5d5675	ci(data-prepper): keep validation labels portable	2026-04-22 13:13:56 -03:00
jenkins	4282810602	ci(data-prepper): retrigger quality publish	2026-04-22 13:07:37 -03:00
jenkins	8a58132dd4	ci(data-prepper): avoid xml parser in metrics publish	2026-04-22 13:04:47 -03:00
jenkins	be0d3e4300	ci(data-prepper): harden quality evidence helpers	2026-04-22 12:58:27 -03:00
jenkins	ba6848a67a	ci(data-prepper): publish real testcase metrics	2026-04-22 12:48:36 -03:00
jenkins	23beb08e5e	monitoring(testing): split quality trend panels	2026-04-22 12:42:33 -03:00
Brad Stein	5d560d962d	chore(metis): deploy scratch annotation sync	2026-04-22 04:28:08 -03:00
Brad Stein	51ade59a46	fix(metis): keep sentinel rollouts moving on degraded nodes	2026-04-22 03:40:28 -03:00
Brad Stein	7f91be27f9	chore(metis): deploy scratch sentinel fix	2026-04-22 03:33:54 -03:00
Brad Stein	63cd159151	test(titan-iac): cover mailu sync scripts	2026-04-22 02:53:00 -03:00
Brad Stein	443c70d01b	monitoring(testing): promote atlas testing layout	2026-04-22 02:26:31 -03:00
flux-bot	9f0ea1683a	chore(bstein-dev-home): automated image update	2026-04-22 05:01:25 +00:00
flux-bot	55df293e00	chore(bstein-dev-home): automated image update	2026-04-22 05:00:26 +00:00
Brad Stein	3168ffe027	ci(titan-iac): feed coverage into sonar gate	2026-04-22 01:57:19 -03:00
Brad Stein	abdefbbd05	ci(quality): enforce sonar and supply-chain gates	2026-04-22 01:29:54 -03:00
flux-bot	ead503d71e	chore(bstein-dev-home): automated image update	2026-04-22 04:15:46 +00:00
flux-bot	f54bdf8483	chore(bstein-dev-home): automated image update	2026-04-22 04:14:49 +00:00
flux-bot	80cb4c257f	chore(bstein-dev-home): automated image update	2026-04-22 04:06:45 +00:00
flux-bot	228e8a9772	chore(bstein-dev-home): automated image update	2026-04-22 04:05:50 +00:00
Brad Stein	15c798b915	gitops(bstein-home): deploy current image tags on main	2026-04-22 00:53:06 -03:00
Brad Stein	2ded2eb23d	ci(titan-iac): apply supply-chain waiver ledger	2026-04-22 00:42:03 -03:00
flux-bot	e6bb015ef2	chore(maintenance): automated image update	2026-04-22 03:26:48 +00:00
flux-bot	ead7c276b4	chore(maintenance): automated image update	2026-04-22 03:11:42 +00:00
Brad Stein	bfad9c19c5	deploy(bstein-home): target non-root frontend port	2026-04-22 00:01:50 -03:00
Brad Stein	439a44bc85	ci(data-prepper): scan staged supply-chain inputs	2026-04-21 23:29:53 -03:00
flux-bot	13f179d842	chore(maintenance): automated image update	2026-04-22 02:09:28 +00:00
Brad Stein	c0e5df30d5	ci(quality): use preloaded scanner image	2026-04-21 22:50:53 -03:00
flux-bot	79fbf2644b	chore(maintenance): automated image update	2026-04-22 01:50:20 +00:00
Brad Stein	0eca6adbbb	ci(quality): pass sonar token as login	2026-04-21 22:17:55 -03:00
Brad Stein	5801633b30	ci(quality): run sonar and supply-chain scans	2026-04-21 22:09:06 -03:00
Brad Stein	fac139fd0e	monitoring: rotate grafana dedupe job	2026-04-21 21:25:05 -03:00
jenkins	2df830f01b	longhorn: bound settings sync curl calls and rerun job	2026-04-21 21:18:41 -03:00
flux-bot	26fab34de5	chore(maintenance): automated image update	2026-04-22 00:16:57 +00:00
jenkins	e29d0fe349	longhorn: rebalance replicas and cap rebuild pressure	2026-04-21 21:12:19 -03:00
jenkins	77f7620eca	scheduling: de-prefer spillover nodes for non-longhorn services	2026-04-21 21:00:56 -03:00
Brad Stein	fb0dd60954	jenkins: allow slow controller startup	2026-04-21 20:54:42 -03:00
jenkins	4401c26496	jenkins: de-prefer spillover longhorn nodes for controller and agents	2026-04-21 20:48:02 -03:00
Brad Stein	9682a17a82	jenkins: avoid recursive volume ownership resets	2026-04-21 20:34:02 -03:00
Brad Stein	55d87c0c14	ci(quality): bind sonarqube token credential in pipelines	2026-04-21 20:16:59 -03:00
Brad Stein	379f20efc5	jenkins: prefer rpi5 without hard pin	2026-04-21 19:51:09 -03:00
Brad Stein	7883593166	ci(jenkins): inject sonarqube token from vault	2026-04-21 19:43:08 -03:00
flux-bot	5509dd86d5	chore(maintenance): automated image update	2026-04-21 22:01:24 +00:00
Brad Stein	06b27c9b9a	ci(titan-iac): lower agent cpu request	2026-04-21 18:32:45 -03:00
flux-bot	a927affb1f	chore(maintenance): automated image update	2026-04-21 21:22:18 +00:00
flux-bot	fab182e91e	chore(maintenance): automated image update	2026-04-21 20:59:18 +00:00
Brad Stein	d5be9e1ae9	ci(data-prepper): use mirrored base artifact	2026-04-21 16:56:25 -03:00
Brad Stein	fb48d473d2	ci(data-prepper): report n/a coverage as complete	2026-04-21 16:32:42 -03:00
Brad Stein	5e5cffbdc7	ci(data-prepper): allow arm64 worker scheduling	2026-04-21 15:33:42 -03:00
Brad Stein	e1d804dbb0	ci(data-prepper): lower kaniko cpu request	2026-04-21 15:26:13 -03:00
flux-bot	2086427b72	chore(maintenance): automated image update	2026-04-21 17:56:42 +00:00
Brad Stein	e811c0cabf	ci(jenkins): require rpi5 controller placement	2026-04-21 14:12:14 -03:00
flux-bot	b68c002e2d	chore(maintenance): automated image update	2026-04-21 17:05:21 +00:00
Brad Stein	cb7e0238dc	infra(ci): use harbor python utility images	2026-04-21 13:37:46 -03:00
flux-bot	043a2e75c8	chore(maintenance): automated image update	2026-04-21 16:30:12 +00:00
Brad Stein	6ac375f82e	ci(titan-iac): use harbor python runner	2026-04-21 13:18:31 -03:00
jenkins	8c1a26ead6	ci(titan-iac): use in-cluster victoria metrics dns	2026-04-21 12:30:06 -03:00
jenkins	d119f838e9	ci(titan-iac): harden quality metric publisher	2026-04-21 12:24:18 -03:00
jenkins	ae2356de6a	monitoring(testing): render missing metric zero states	2026-04-21 11:46:15 -03:00
jenkins	c1ac36df17	monitoring(testing): link test metrics to build artifacts	2026-04-21 11:39:13 -03:00
jenkins	cc79f3ebcd	ci(titan-iac): include primary branch in quality metrics	2026-04-21 11:08:59 -03:00
jenkins	1f991fc43d	harbor: expand registry storage	2026-04-21 10:56:27 -03:00
jenkins	b62980b76d	harbor: reduce vault injector bootstrap requests	2026-04-21 10:08:39 -03:00
jenkins	26da4945ea	harbor: move registry bootstrap to titan-11	2026-04-21 09:55:29 -03:00
jenkins	d599a162a9	monitoring(testing): add branch evidence panels	2026-04-21 09:35:43 -03:00
jenkins	e53adc17b3	ci(data-prepper): archive full quality evidence	2026-04-21 09:24:09 -03:00
jenkins	7cd40d457d	Merge remote-tracking branch 'origin/main'	2026-04-21 09:23:03 -03:00
flux-bot	d559d03bea	chore(maintenance): automated image update	2026-04-21 06:32:37 +00:00
flux-bot	691dc3c71b	chore(maintenance): automated image update	2026-04-21 06:27:29 +00:00
flux-bot	e81ecdd716	chore(maintenance): automated image update	2026-04-21 06:14:21 +00:00
flux-bot	74e385ad8b	chore(maintenance): automated image update	2026-04-21 06:10:27 +00:00
flux-bot	fecd095717	chore(maintenance): automated image update	2026-04-21 06:03:10 +00:00
flux-bot	caa02806c0	chore(maintenance): automated image update	2026-04-21 06:00:02 +00:00
flux-bot	c6c6f90d26	chore(maintenance): automated image update	2026-04-21 05:54:02 +00:00
flux-bot	e4efb89466	chore(maintenance): automated image update	2026-04-21 05:52:01 +00:00
flux-bot	8584885ddd	chore(maintenance): automated image update	2026-04-21 05:44:00 +00:00
flux-bot	6aeacaf872	chore(maintenance): automated image update	2026-04-21 05:42:00 +00:00
flux-bot	0146b92cc1	chore(maintenance): automated image update	2026-04-21 05:33:59 +00:00
flux-bot	981fca6cb4	chore(maintenance): automated image update	2026-04-21 05:26:59 +00:00
flux-bot	6dab28081d	chore(maintenance): automated image update	2026-04-21 05:12:56 +00:00
flux-bot	6ebc475da2	chore(maintenance): automated image update	2026-04-21 05:05:56 +00:00
flux-bot	fff26ebacb	chore(maintenance): automated image update	2026-04-21 04:57:54 +00:00
flux-bot	e3bebaa10b	chore(maintenance): automated image update	2026-04-21 04:55:55 +00:00
flux-bot	df16f03e46	chore(maintenance): automated image update	2026-04-21 04:46:53 +00:00
flux-bot	b5243e8566	chore(maintenance): automated image update	2026-04-21 04:36:52 +00:00
flux-bot	4501bbf8f0	chore(maintenance): automated image update	2026-04-21 04:34:52 +00:00
flux-bot	5331d7149a	chore(maintenance): automated image update	2026-04-21 04:24:51 +00:00
jenkins	c4b0389892	quality(titan-iac): widen enforced coverage contract	2026-04-20 21:39:53 -03:00
jenkins	387e104359	test(titan-iac): widen tracked quality coverage	2026-04-20 21:34:59 -03:00
jenkins	5ebc320843	ci(titan-iac): support direct script execution for metrics publish	2026-04-20 15:47:20 -03:00
jenkins	006f79658f	ci(titan-iac): retrigger after titan-09 cordon	2026-04-20 15:36:51 -03:00
jenkins	9451bb9c61	test(titan-iac): raise quality gate coverage for quality runner	2026-04-20 15:29:46 -03:00
jenkins	655c26c589	quality(titan-iac): split metrics publisher and harden gate lint	2026-04-20 15:21:49 -03:00
jenkins	607d8c21fa	monitoring(testing): fix missing-state queries and add test-case drilldowns	2026-04-20 13:45:01 -03:00
jenkins	b7f6cbd87c	ci(titan-iac): enforce 30d build and artifact retention	2026-04-20 12:30:57 -03:00
jenkins	a07b49a05f	monitoring(testing): fix atlas-jobs coverage and loc query expressions	2026-04-20 12:20:42 -03:00
jenkins	1d4227beec	ci(data-prepper): add retention and archive quality artifacts	2026-04-20 10:55:13 -03:00
jenkins	57306201cf	monitoring(testing): backfill placeholder test-case metrics across sparse suites	2026-04-20 09:13:34 -03:00
jenkins	7437ec5929	ci(titan-iac): emit placeholder test-case metric when junit has no cases	2026-04-20 09:10:04 -03:00
jenkins	710ec96990	test(titan-iac): update payload unit tests for per-test metric argument	2026-04-20 08:50:39 -03:00
jenkins	cb1c41c6ea	ci(titan-iac): infer coverage/loc metrics from quality summary artifacts	2026-04-20 08:43:21 -03:00
jenkins	e8823197f8	monitoring(testing): align test selector with exported job label	2026-04-20 08:38:38 -03:00
jenkins	c5b1302ff6	monitoring(testing): add fallbacks for problematic-test trend queries	2026-04-20 08:37:26 -03:00
jenkins	f02db9801c	monitoring(testing): add per-test metrics and flaky-test panels	2026-04-20 08:35:05 -03:00
jenkins	7d113291c9	monitoring(testing): split check trends into per-check success/failure panels	2026-04-20 08:07:30 -03:00
jenkins	47d5416dde	ci(titan-iac): harden promote git workspace detection	2026-04-20 00:59:24 -03:00
codex	f2c4204bab	monitoring(testing): fix suite all filter aliases and regex templating	2026-04-19 23:22:34 -03:00
codex	71cfdce862	jenkins: source streaming harbor creds from dedicated vault path	2026-04-19 23:02:30 -03:00
codex	d4112e5a74	ci(titan-iac): guard promote stage when workspace lacks .git	2026-04-19 22:58:58 -03:00
codex	6d2c72ff98	jenkins: keep streaming creds optional without vault hard dependency	2026-04-19 22:45:25 -03:00
codex	c8f7cd6ec2	jenkins(logging): split streaming harbor credentials	2026-04-19 22:40:56 -03:00
codex	bd85143aa0	jenkins: stop overriding push creds with harbor-pull secret	2026-04-19 22:36:18 -03:00
codex	cb992d1c53	maintenance(metis): raise remote timeout and improve progress	2026-04-19 22:34:16 -03:00
codex	7be6cfb9cb	ci(titan-iac): install git in runner before promote stage	2026-04-19 22:33:22 -03:00
codex	b848e6b6d8	monitoring(dashboards): regenerate atlas-testing from generator	2026-04-19 22:29:20 -03:00
flux-bot	849bba8f5d	chore(maintenance): automated image update	2026-04-20 01:19:35 +00:00
codex	86c492d8c1	ci: retrigger titan-iac after titan-18 cordon	2026-04-19 22:07:10 -03:00
codex	1ed8b7233d	maintenance(metis): roll duplicate-build fix to 0.1.0-24	2026-04-19 22:03:04 -03:00
codex	ddabda06bf	ci: fix data-prepper defaults and restore metrics publisher coverage	2026-04-19 21:57:40 -03:00
codex	881c724725	jenkins: revert sonar vault path injection blocking startup	2026-04-19 21:42:04 -03:00
codex	2db4952c39	jenkins(sonar): wire defaults and observe-mode toggles	2026-04-19 21:30:02 -03:00
codex	57432e01a3	maintenance(metis): export bastion ssh key for replacement readiness	2026-04-19 21:22:57 -03:00
codex	97bc0cea8c	maintenance(metis): use inventory path available in remote runner pods	2026-04-19 21:18:30 -03:00
codex	e930aac039	ci(gate): enforce sonar and supply-chain checks across suites	2026-04-19 21:16:42 -03:00
flux-bot	13ec9b2d7d	chore(maintenance): automated image update	2026-04-20 00:14:29 +00:00
Brad Stein	d8f07c2b70	maintenance(metis): run vault-enabled metis service image	2026-04-19 21:14:19 -03:00
Brad Stein	20a255252c	maintenance(metis): add titan-16 replacement profile	2026-04-19 21:01:49 -03:00
Brad Stein	376e68ec31	maintenance(metis): inject harbor creds into service runtime	2026-04-19 20:52:04 -03:00
flux-bot	7497f8d4e0	chore(maintenance): automated image update	2026-04-19 23:45:10 +00:00
Brad Stein	b3270e7231	maintenance(metis): add titan-10 and titan-12 inventory profiles	2026-04-19 20:44:12 -03:00
Brad Stein	1dce63fb9b	monitoring(testing): render zero-state data for missing/sonar panels	2026-04-19 16:56:22 -03:00
Brad Stein	96f3844677	quality(sonarqube): read exporter token from shared oidc vault path	2026-04-19 16:40:39 -03:00
Brad Stein	65edbd9ed9	quality(sonarqube): inject exporter token from vault	2026-04-19 16:34:27 -03:00
Brad Stein	29138b8a51	ci(metrics): publish canonical titan-iac gate checks	2026-04-19 16:29:07 -03:00
flux-bot	aede5aa899	chore(maintenance): automated image update	2026-04-19 19:19:49 +00:00
Brad Stein	12293c9d11	test(ci): align publish_test_metrics unit tests with current API	2026-04-19 16:18:35 -03:00
Brad Stein	2d0360be3b	ci(metrics): use Pushgateway PUT for suite payload replacement	2026-04-19 16:10:20 -03:00
Brad Stein	f9d7694f25	monitoring(testing): harden suite selector and success history query	2026-04-19 15:31:59 -03:00
Brad Stein	9e3cc0f760	ci(jenkins): fix glue test VM URL and default SA observer RBAC	2026-04-19 15:06:13 -03:00
Brad Stein	32410555cd	monitoring: remove combined UPS draw series from history panels	2026-04-19 14:51:25 -03:00
Brad Stein	347e7ccc84	monitoring: revert atlas overview dashboard to pre-quality changes	2026-04-19 14:43:41 -03:00
Brad Stein	e47a877169	ci: resolve flux branch without Groovy dollar interpolation	2026-04-19 14:41:22 -03:00
Brad Stein	592d037522	ci: fix titan-iac and data-prepper pipeline gate publishing	2026-04-19 14:33:26 -03:00
Brad Stein	3ccc2a1100	quality: standardize suite checks and add SonarQube stack	2026-04-19 14:18:58 -03:00
Brad Stein	9a20f4f854	monitoring(testing): redesign atlas testing dashboard and unify suite aliases	2026-04-18 17:47:06 -03:00
Brad Stein	9a8c454123	tests(quality-gate): cover metrics publisher edge paths	2026-04-18 17:29:50 -03:00
flux-bot	e1f430455d	chore(maintenance): automated image update	2026-04-18 19:36:24 +00:00
Brad Stein	01fe20fe68	monitoring(metrics): normalize platform gate contract and pegasus suite name	2026-04-18 16:34:20 -03:00