Compare commits

..

42 Commits

Author SHA1 Message Date
jenkins
9a86c350dd quality(titan-iac): split metrics publisher and harden gate lint 2026-04-20 15:20:56 -03:00
jenkins
d342053196 ci(data-prepper): emit placeholder per-test metric series 2026-04-20 12:06:25 -03:00
jenkins
c3cca8ad9a monitoring(testing): add per-test history panels and metric emitter 2026-04-20 12:03:27 -03:00
jenkins
9103cd22f2 ci(data-prepper): add retention and archive quality artifacts 2026-04-20 10:49:54 -03:00
094d202803 monitoring: remove combined UPS draw series from history panels 2026-04-19 14:50:24 -03:00
411bc6b90d monitoring: elevate Atlas Testing dashboard and no-data fallbacks 2026-04-18 14:50:59 -03:00
26b8f23426 quality-gate: emit workspace coverage and LOC gauges 2026-04-17 05:47:38 -03:00
629df65c7b monitoring(soteria): tune PVC backup age thresholds for nightly cadence 2026-04-14 02:14:43 -03:00
e5a824e4e1 typhon: register app and add v2-safe ble/control runtime toggles 2026-04-13 22:02:57 -03:00
6815a67c1f maintenance(soteria): roll out 0.1.0-35 2026-04-13 16:51:46 -03:00
deefdb53ad maintenance(soteria): roll out 0.1.0-34 2026-04-13 14:23:24 -03:00
4e4c310cd4 maintenance(soteria): roll out 0.1.0-33 2026-04-13 13:58:44 -03:00
df79cad1c3 maintenance(soteria): grant pod logs and roll out 0.1.0-32 2026-04-13 12:51:38 -03:00
b3d8b13f39 maintenance(soteria): roll pvc-node pin fix and pod-read rbac 2026-04-13 03:32:25 -03:00
a23b6a4b93 maintenance(soteria): move restic vault path to shared scope 2026-04-13 03:01:29 -03:00
38abbd9fe1 maintenance(vault): roll sync pod after soteria secret mapping 2026-04-13 02:55:42 -03:00
ac12a9bfed maintenance(soteria): source restic credentials from vault 2026-04-13 02:54:05 -03:00
8a371e1267 monitoring(alerts): make soteria backup health rule driver-agnostic 2026-04-13 02:38:53 -03:00
f25186ef7e maintenance(soteria): switch to encrypted restic backups 2026-04-13 02:14:39 -03:00
a01dc0813a maintenance(soteria): enable b2 usage scan config and alert 2026-04-12 19:47:58 -03:00
609cfcb696 monitoring: force horizontal stat layout for power/climate panels 2026-04-12 19:04:35 -03:00
75a992b829 maintenance(soteria): tighten oauth2 ingress and drill validation 2026-04-12 14:58:25 -03:00
a87a5f7bff monitoring: fix typhon low-threshold alert semantics 2026-04-12 14:56:34 -03:00
a1c8a99866 monitoring(alerts): watch soteria authz denial spikes 2026-04-12 12:19:42 -03:00
7b3dfa335b maintenance(soteria): harden ingress path and add backup alerts 2026-04-12 12:12:43 -03:00
e1bba18b52 maintenance: set explicit jenkins cleanup schedule 2026-04-12 11:36:50 -03:00
52882f1bb5 maintenance(soteria): add serviceaccount and rbac manifests 2026-04-12 11:36:33 -03:00
5128741c53 maintenance: default jenkins cleanup to dry-run 2026-04-12 11:28:48 -03:00
96f923ae4c maintenance(soteria): add protected UI, OIDC bootstrap, and backup health panel wiring 2026-04-12 11:16:29 -03:00
95bc3953d1 maintenance: wire jenkins cleanup permissions 2026-04-12 11:00:50 -03:00
f4e921bb33 scheduling: keep app workloads off control-plane 2026-04-12 04:26:52 -03:00
616c6308b1 maintenance: remove pi-usb-scratch guard rollout 2026-04-12 01:02:41 -03:00
d9b30d6c5b maintenance(pi-usb-scratch): skip k3s runtime rsync during cutover 2026-04-11 12:11:15 -03:00
7c337ad5a1 maintenance(pi-usb-scratch): disable rollout jitter for initial cutover 2026-04-11 12:00:30 -03:00
3823b68ee2 maintenance(pi-usb-scratch): fix false mount conflict detection 2026-04-11 11:57:50 -03:00
40de2b59a5 maintenance: enforce Astraios + tmpfs /tmp on worker Pis 2026-04-11 11:54:43 -03:00
5483c04bb3 maintenance: add worker pi usb scratch rollout 2026-04-11 01:03:42 -03:00
64b4f14018 ariadne: remove remaining cronjobs and migrate schedule ownership 2026-04-10 22:40:58 -03:00
166020ca1d ariadne: migrate glue cronjobs to schedules 2026-04-10 21:22:35 -03:00
60446ee830 testing(ci): centralize quality gate contract 2026-04-10 17:06:53 -03:00
c38b6c5e27 ci: publish titan-iac tests and seed ananke/lesavka jobs 2026-04-10 16:38:55 -03:00
9419c4b26b dashboards: unify suite pass-rate metrics on platform counters 2026-04-10 15:35:20 -03:00
160 changed files with 7270 additions and 24068 deletions

306
Jenkinsfile vendored
View File

@ -12,19 +12,8 @@ spec:
kubernetes.io/arch: arm64 kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
containers: containers:
- name: jnlp
image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
resources:
requests:
cpu: "25m"
memory: "256Mi"
- name: python - name: python
image: registry.bstein.dev/bstein/python:3.12-slim image: python:3.12-slim
command:
- cat
tty: true
- name: quality-tools
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
command: command:
- cat - cat
tty: true tty: true
@ -34,21 +23,8 @@ spec:
environment { environment {
PIP_DISABLE_PIP_VERSION_CHECK = '1' PIP_DISABLE_PIP_VERSION_CHECK = '1'
PYTHONUNBUFFERED = '1' PYTHONUNBUFFERED = '1'
SUITE_NAME = 'titan_iac' SUITE_NAME = 'titan-iac'
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091' PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
SONARQUBE_PROJECT_KEY = 'titan_iac'
SONARQUBE_TOKEN = credentials('sonarqube-token')
VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
QUALITY_GATE_SONARQUBE_ENFORCE = '1'
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
QUALITY_GATE_IRONBANK_ENFORCE = '1'
QUALITY_GATE_IRONBANK_REQUIRED = '0'
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
}
options {
disableConcurrentBuilds()
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
} }
stages { stages {
stage('Checkout') { stage('Checkout') {
@ -58,175 +34,7 @@ spec:
} }
stage('Install deps') { stage('Install deps') {
steps { steps {
sh ''' sh 'pip install --no-cache-dir -r ci/requirements.txt'
set -eu
if ! command -v git >/dev/null 2>&1; then
apt-get update
apt-get install -y --no-install-recommends git ca-certificates
rm -rf /var/lib/apt/lists/*
fi
pip install --no-cache-dir -r ci/requirements.txt
'''
}
}
stage('Prepare local quality evidence') {
steps {
sh '''
set -eu
mkdir -p build
set +e
python3 -m testing.quality_gate --profile local --build-dir build
local_quality_rc=$?
set -e
printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
'''
}
}
stage('Collect SonarQube evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
args=(
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
"-Dsonar.login=${SONARQUBE_TOKEN}"
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.sources=."
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml"
"-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
)
[ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
set +e
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
rc=${PIPESTATUS[0]}
set -e
printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
'''
}
sh '''
set -eu
mkdir -p build
python3 - <<'PY'
import base64
import json
import os
import time
import urllib.parse
import urllib.request
from pathlib import Path
host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
token = os.getenv('SONARQUBE_TOKEN', '').strip()
report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
payload = {
"status": "ERROR",
"note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
}
if host and project_key:
task_file = Path('.scannerwork/report-task.txt')
task_id = ''
if task_file.exists():
for line in task_file.read_text(encoding='utf-8').splitlines():
key, _, value = line.partition('=')
if key == 'ceTaskId':
task_id = value.strip()
break
if task_id:
ce_query = urllib.parse.urlencode({"id": task_id})
deadline = time.monotonic() + 180
while time.monotonic() < deadline:
ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
ce_request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(ce_request, timeout=12) as response:
ce_payload = json.loads(response.read().decode("utf-8"))
except Exception:
time.sleep(3)
continue
status = str(ce_payload.get("task", {}).get("status", "")).upper()
if status in {"SUCCESS", "FAILED", "CANCELED"}:
break
time.sleep(3)
query = urllib.parse.urlencode({"projectKey": project_key})
request = urllib.request.Request(
f"{host}/api/qualitygates/project_status?{query}",
method="GET",
)
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(request, timeout=12) as response:
payload = json.loads(response.read().decode("utf-8"))
except Exception as exc: # noqa: BLE001
payload = {"status": "ERROR", "error": str(exc)}
with open(report_path, "w", encoding="utf-8") as handle:
json.dump(payload, handle, indent=2, sort_keys=True)
handle.write("\\n")
PY
'''
}
}
stage('Collect IronBank evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
set +e
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
trivy_rc=$?
set -e
if [ ! -s build/trivy-fs.json ]; then
cat > build/ironbank-compliance.json <<EOF
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
EOF
exit 0
fi
'''
}
sh '''
set -eu
mkdir -p build
if [ -s build/trivy-fs.json ]; then
python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
exit 0
fi
python3 - <<'PY'
import json
import os
from pathlib import Path
report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
if report_path.exists():
raise SystemExit(0)
status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
payload = {
"status": status or "unknown",
"compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
}
payload = {k: v for k, v in payload.items() if v is not None}
if "status" not in payload:
payload["status"] = "unknown"
payload["note"] = (
"Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
"or write build/ironbank-compliance.json in image-building repos."
)
report_path.parent.mkdir(parents=True, exist_ok=True)
report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
PY
'''
} }
} }
stage('Run quality gate') { stage('Run quality gate') {
@ -256,96 +64,8 @@ PY
stage('Enforce quality gate') { stage('Enforce quality gate') {
steps { steps {
sh ''' sh '''
set -euo pipefail set -eu
gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)" test "$(cat build/quality-gate.rc 2>/dev/null || echo 1)" -eq 0
fail=0
if [ "${gate_rc}" -ne 0 ]; then
echo "quality gate failed with rc=${gate_rc}" >&2
fail=1
fi
enabled() {
case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
1|true|yes|on) return 0 ;;
*) return 1 ;;
esac
}
if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
sonar_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/sonarqube-quality-gate.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
print(status or "missing")
PY
)"
case "${sonar_status}" in
ok|pass|passed|success) ;;
*)
echo "sonarqube gate failed: ${sonar_status}" >&2
fail=1
;;
esac
fi
ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
ironbank_required=1
fi
if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
supply_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/ironbank-compliance.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
compliant = payload.get("compliant")
if compliant is True:
print("ok")
elif compliant is False:
print("failed")
else:
status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
print(status or "missing")
PY
)"
case "${supply_status}" in
ok|pass|passed|success|compliant) ;;
not_applicable|na|n/a)
if enabled "${ironbank_required}"; then
echo "supply chain gate required but status=${supply_status}" >&2
fail=1
fi
;;
*)
if enabled "${ironbank_required}"; then
echo "supply chain gate failed: ${supply_status}" >&2
fail=1
else
echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
fi
;;
esac
fi
exit "${fail}"
''' '''
} }
} }
@ -354,7 +74,7 @@ PY
script { script {
env.FLUX_BRANCH = sh( env.FLUX_BRANCH = sh(
returnStdout: true, returnStdout: true,
script: "grep -m1 '^\\s*branch:' clusters/atlas/flux-system/gotk-sync.yaml | sed 's/^\\s*branch:\\s*//'" script: "awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml"
).trim() ).trim()
if (!env.FLUX_BRANCH) { if (!env.FLUX_BRANCH) {
error('Flux branch not found in gotk-sync.yaml') error('Flux branch not found in gotk-sync.yaml')
@ -373,20 +93,6 @@ PY
steps { steps {
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) { withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
sh ''' sh '''
set -euo pipefail
if ! command -v git >/dev/null 2>&1; then
if command -v apk >/dev/null 2>&1; then
apk add --no-cache git >/dev/null
elif command -v apt-get >/dev/null 2>&1; then
apt-get update >/dev/null
apt-get install -y git >/dev/null
fi
fi
cd "${WORKSPACE:-$PWD}"
if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
echo "workspace is not a git checkout; skipping promote"
exit 0
fi
set +x set +x
git config user.email "jenkins@bstein.dev" git config user.email "jenkins@bstein.dev"
git config user.name "jenkins" git config user.name "jenkins"

View File

@ -11,19 +11,8 @@ spec:
kubernetes.io/arch: arm64 kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
containers: containers:
- name: jnlp
image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
resources:
requests:
cpu: "25m"
memory: "256Mi"
- name: python - name: python
image: registry.bstein.dev/bstein/python:3.12-slim image: python:3.12-slim
command:
- cat
tty: true
- name: quality-tools
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
command: command:
- cat - cat
tty: true tty: true
@ -33,21 +22,8 @@ spec:
environment { environment {
PIP_DISABLE_PIP_VERSION_CHECK = '1' PIP_DISABLE_PIP_VERSION_CHECK = '1'
PYTHONUNBUFFERED = '1' PYTHONUNBUFFERED = '1'
SUITE_NAME = 'titan_iac' SUITE_NAME = 'titan-iac'
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091' PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
SONARQUBE_PROJECT_KEY = 'titan_iac'
SONARQUBE_TOKEN = credentials('sonarqube-token')
VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
QUALITY_GATE_SONARQUBE_ENFORCE = '1'
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
QUALITY_GATE_IRONBANK_ENFORCE = '1'
QUALITY_GATE_IRONBANK_REQUIRED = '0'
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
}
options {
disableConcurrentBuilds()
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
} }
stages { stages {
stage('Checkout') { stage('Checkout') {
@ -57,175 +33,7 @@ spec:
} }
stage('Install deps') { stage('Install deps') {
steps { steps {
sh ''' sh 'pip install --no-cache-dir -r ci/requirements.txt'
set -eu
if ! command -v git >/dev/null 2>&1; then
apt-get update
apt-get install -y --no-install-recommends git ca-certificates
rm -rf /var/lib/apt/lists/*
fi
pip install --no-cache-dir -r ci/requirements.txt
'''
}
}
stage('Prepare local quality evidence') {
steps {
sh '''
set -eu
mkdir -p build
set +e
python3 -m testing.quality_gate --profile local --build-dir build
local_quality_rc=$?
set -e
printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
'''
}
}
stage('Collect SonarQube evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
args=(
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
"-Dsonar.login=${SONARQUBE_TOKEN}"
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.sources=."
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml"
"-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
)
[ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
set +e
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
rc=${PIPESTATUS[0]}
set -e
printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
'''
}
sh '''
set -eu
mkdir -p build
python3 - <<'PY'
import base64
import json
import os
import time
import urllib.parse
import urllib.request
from pathlib import Path
host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
token = os.getenv('SONARQUBE_TOKEN', '').strip()
report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
payload = {
"status": "ERROR",
"note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
}
if host and project_key:
task_file = Path('.scannerwork/report-task.txt')
task_id = ''
if task_file.exists():
for line in task_file.read_text(encoding='utf-8').splitlines():
key, _, value = line.partition('=')
if key == 'ceTaskId':
task_id = value.strip()
break
if task_id:
ce_query = urllib.parse.urlencode({"id": task_id})
deadline = time.monotonic() + 180
while time.monotonic() < deadline:
ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
ce_request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(ce_request, timeout=12) as response:
ce_payload = json.loads(response.read().decode("utf-8"))
except Exception:
time.sleep(3)
continue
status = str(ce_payload.get("task", {}).get("status", "")).upper()
if status in {"SUCCESS", "FAILED", "CANCELED"}:
break
time.sleep(3)
query = urllib.parse.urlencode({"projectKey": project_key})
request = urllib.request.Request(
f"{host}/api/qualitygates/project_status?{query}",
method="GET",
)
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(request, timeout=12) as response:
payload = json.loads(response.read().decode("utf-8"))
except Exception as exc: # noqa: BLE001
payload = {"status": "ERROR", "error": str(exc)}
with open(report_path, "w", encoding="utf-8") as handle:
json.dump(payload, handle, indent=2, sort_keys=True)
handle.write("\\n")
PY
'''
}
}
stage('Collect IronBank evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
set +e
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
trivy_rc=$?
set -e
if [ ! -s build/trivy-fs.json ]; then
cat > build/ironbank-compliance.json <<EOF
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
EOF
exit 0
fi
'''
}
sh '''
set -eu
mkdir -p build
if [ -s build/trivy-fs.json ]; then
python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
exit 0
fi
python3 - <<'PY'
import json
import os
from pathlib import Path
report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
if report_path.exists():
raise SystemExit(0)
status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
payload = {
"status": status or "unknown",
"compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
}
payload = {k: v for k, v in payload.items() if v is not None}
if "status" not in payload:
payload["status"] = "unknown"
payload["note"] = (
"Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
"or write build/ironbank-compliance.json in image-building repos."
)
report_path.parent.mkdir(parents=True, exist_ok=True)
report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
PY
'''
} }
} }
stage('Run quality gate') { stage('Run quality gate') {
@ -255,96 +63,8 @@ PY
stage('Enforce quality gate') { stage('Enforce quality gate') {
steps { steps {
sh ''' sh '''
set -euo pipefail set -eu
gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)" test "$(cat build/quality-gate.rc 2>/dev/null || echo 1)" -eq 0
fail=0
if [ "${gate_rc}" -ne 0 ]; then
echo "quality gate failed with rc=${gate_rc}" >&2
fail=1
fi
enabled() {
case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
1|true|yes|on) return 0 ;;
*) return 1 ;;
esac
}
if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
sonar_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/sonarqube-quality-gate.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
print(status or "missing")
PY
)"
case "${sonar_status}" in
ok|pass|passed|success) ;;
*)
echo "sonarqube gate failed: ${sonar_status}" >&2
fail=1
;;
esac
fi
ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
ironbank_required=1
fi
if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
supply_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/ironbank-compliance.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
compliant = payload.get("compliant")
if compliant is True:
print("ok")
elif compliant is False:
print("failed")
else:
status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
print(status or "missing")
PY
)"
case "${supply_status}" in
ok|pass|passed|success|compliant) ;;
not_applicable|na|n/a)
if enabled "${ironbank_required}"; then
echo "supply chain gate required but status=${supply_status}" >&2
fail=1
fi
;;
*)
if enabled "${ironbank_required}"; then
echo "supply chain gate failed: ${supply_status}" >&2
fail=1
else
echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
fi
;;
esac
fi
exit "${fail}"
''' '''
} }
} }
@ -372,20 +92,6 @@ PY
steps { steps {
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) { withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
sh ''' sh '''
set -euo pipefail
if ! command -v git >/dev/null 2>&1; then
if command -v apk >/dev/null 2>&1; then
apk add --no-cache git >/dev/null
elif command -v apt-get >/dev/null 2>&1; then
apt-get update >/dev/null
apt-get install -y git >/dev/null
fi
fi
cd "${WORKSPACE:-$PWD}"
if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
echo "workspace is not a git checkout; skipping promote"
exit 0
fi
set +x set +x
git config user.email "jenkins@bstein.dev" git config user.email "jenkins@bstein.dev"
git config user.name "jenkins" git config user.name "jenkins"

View File

@ -6,14 +6,10 @@ from __future__ import annotations
import json import json
import os import os
from glob import glob from glob import glob
from pathlib import Path
import sys
import urllib.error import urllib.error
import urllib.request import urllib.request
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
from ci.scripts import publish_test_metrics_quality as _quality_helpers from ci.scripts import publish_test_metrics_quality as _quality_helpers
CANONICAL_CHECKS = _quality_helpers.CANONICAL_CHECKS CANONICAL_CHECKS = _quality_helpers.CANONICAL_CHECKS
@ -187,7 +183,6 @@ def _build_payload(
failed_count: int, failed_count: int,
branch: str, branch: str,
build_number: str, build_number: str,
jenkins_job: str,
summary: dict | None = None, summary: dict | None = None,
workspace_line_coverage_percent: float = 0.0, workspace_line_coverage_percent: float = 0.0,
source_lines_over_500: int = 0, source_lines_over_500: int = 0,
@ -200,15 +195,8 @@ def _build_payload(
"suite": suite, "suite": suite,
"branch": branch or "unknown", "branch": branch or "unknown",
"build_number": build_number or "unknown", "build_number": build_number or "unknown",
"jenkins_job": jenkins_job or suite,
} }
) )
test_case_base_labels = {
"suite": suite,
"branch": branch or "unknown",
"build_number": build_number or "unknown",
"jenkins_job": jenkins_job or suite,
}
lines = [ lines = [
"# TYPE platform_quality_gate_runs_total counter", "# TYPE platform_quality_gate_runs_total counter",
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count}', f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count}',
@ -221,8 +209,6 @@ def _build_payload(
"# TYPE titan_iac_quality_gate_run_status gauge", "# TYPE titan_iac_quality_gate_run_status gauge",
f'titan_iac_quality_gate_run_status{{suite="{suite}",status="ok"}} {1 if status == "ok" else 0}', f'titan_iac_quality_gate_run_status{{suite="{suite}",status="ok"}} {1 if status == "ok" else 0}',
f'titan_iac_quality_gate_run_status{{suite="{suite}",status="failed"}} {1 if status == "failed" else 0}', f'titan_iac_quality_gate_run_status{{suite="{suite}",status="failed"}} {1 if status == "failed" else 0}',
"# TYPE platform_quality_gate_build_info gauge",
f"platform_quality_gate_build_info{build_labels} 1",
"# TYPE titan_iac_quality_gate_build_info gauge", "# TYPE titan_iac_quality_gate_build_info gauge",
f"titan_iac_quality_gate_build_info{build_labels} 1", f"titan_iac_quality_gate_build_info{build_labels} 1",
"# TYPE platform_quality_gate_workspace_line_coverage_percent gauge", "# TYPE platform_quality_gate_workspace_line_coverage_percent gauge",
@ -240,18 +226,12 @@ def _build_payload(
lines.append("# TYPE platform_quality_gate_test_case_result gauge") lines.append("# TYPE platform_quality_gate_test_case_result gauge")
if test_cases: if test_cases:
for test_name, test_status in test_cases: for test_name, test_status in test_cases:
labels = {
**test_case_base_labels,
"test": test_name,
"status": test_status,
}
lines.append( lines.append(
f"platform_quality_gate_test_case_result{_label_str(labels)} 1" f'platform_quality_gate_test_case_result{{suite="{suite}",test="{_escape_label(test_name)}",status="{_escape_label(test_status)}"}} 1'
) )
else: else:
labels = {**test_case_base_labels, "test": "__no_test_cases__", "status": "skipped"}
lines.append( lines.append(
f"platform_quality_gate_test_case_result{_label_str(labels)} 1" f'platform_quality_gate_test_case_result{{suite="{suite}",test="__no_test_cases__",status="skipped"}} 1'
) )
return "\n".join(lines) + "\n" return "\n".join(lines) + "\n"
@ -264,11 +244,8 @@ def main() -> int:
junit_glob = os.getenv("JUNIT_GLOB", os.getenv("JUNIT_PATH", "build/junit-*.xml")) junit_glob = os.getenv("JUNIT_GLOB", os.getenv("JUNIT_PATH", "build/junit-*.xml"))
exit_code_path = os.getenv("QUALITY_GATE_EXIT_CODE_PATH", os.getenv("GLUE_EXIT_CODE_PATH", "build/quality-gate.rc")) exit_code_path = os.getenv("QUALITY_GATE_EXIT_CODE_PATH", os.getenv("GLUE_EXIT_CODE_PATH", "build/quality-gate.rc"))
summary_path = os.getenv("QUALITY_GATE_SUMMARY_PATH", "build/quality-gate-summary.json") summary_path = os.getenv("QUALITY_GATE_SUMMARY_PATH", "build/quality-gate-summary.json")
branch = os.getenv("BRANCH_NAME") or os.getenv("GIT_BRANCH") or "unknown" branch = os.getenv("BRANCH_NAME", os.getenv("GIT_BRANCH", ""))
if branch.startswith("origin/"):
branch = branch[len("origin/") :]
build_number = os.getenv("BUILD_NUMBER", "") build_number = os.getenv("BUILD_NUMBER", "")
jenkins_job = os.getenv("JOB_NAME", "titan-iac")
tests = _collect_junit_totals(junit_glob) tests = _collect_junit_totals(junit_glob)
test_cases = _collect_junit_cases(junit_glob) test_cases = _collect_junit_cases(junit_glob)
@ -322,7 +299,6 @@ def main() -> int:
failed_count=failed_count, failed_count=failed_count,
branch=branch, branch=branch,
build_number=build_number, build_number=build_number,
jenkins_job=jenkins_job,
summary=summary, summary=summary,
workspace_line_coverage_percent=workspace_line_coverage_percent, workspace_line_coverage_percent=workspace_line_coverage_percent,
source_lines_over_500=source_lines_over_500, source_lines_over_500=source_lines_over_500,

View File

@ -1,173 +0,0 @@
"""Build a titan-iac supply-chain compliance report from Trivy evidence."""
from __future__ import annotations
import argparse
import datetime as dt
import json
from pathlib import Path
from typing import Any
FAIL_SEVERITIES = {"HIGH", "CRITICAL"}
def _read_json(path: Path) -> dict[str, Any]:
"""Read a JSON object from disk for use as pipeline evidence."""
payload = json.loads(path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise ValueError(f"{path} must contain a JSON object")
return payload
def _parse_day(raw: str | None) -> dt.date | None:
"""Parse an ISO day while letting optional waiver dates stay optional."""
if not raw:
return None
return dt.date.fromisoformat(raw)
def _today(override: str | None = None) -> dt.date:
"""Return the policy day so tests can pin expiry behavior."""
return _parse_day(override) or dt.date.today()
def _load_waiver_pairs(path: Path | None, policy_day: dt.date) -> tuple[set[tuple[str, str]], int]:
"""Return active ``(misconfiguration id, target)`` waivers and expired count."""
if path is None or not path.exists():
return set(), 0
payload = _read_json(path)
default_expires_at = payload.get("default_expires_at")
active: set[tuple[str, str]] = set()
expired = 0
for entry in payload.get("misconfigurations", []):
if not isinstance(entry, dict):
continue
misconfiguration_id = str(entry.get("id") or "").strip()
if not misconfiguration_id:
continue
expires_at = _parse_day(str(entry.get("expires_at") or default_expires_at or ""))
targets = entry.get("targets", [])
if not isinstance(targets, list):
continue
if expires_at and expires_at < policy_day:
expired += len(targets)
continue
# Waivers are target-specific so a new unsafe manifest fails until it is
# either fixed or deliberately accepted with a fresh expiration.
for target in targets:
if isinstance(target, str) and target:
active.add((misconfiguration_id, target))
return active, expired
def _iter_failed_misconfigurations(payload: dict[str, Any]):
"""Yield failed high/critical Trivy misconfiguration records."""
for result in payload.get("Results", []):
if not isinstance(result, dict):
continue
target = str(result.get("Target") or "")
for item in result.get("Misconfigurations") or []:
if not isinstance(item, dict):
continue
if item.get("Status") != "FAIL":
continue
if str(item.get("Severity") or "").upper() not in FAIL_SEVERITIES:
continue
yield target, item
def _count_vulnerabilities(payload: dict[str, Any], severity: str) -> int:
"""Count Trivy vulnerabilities at a specific severity."""
count = 0
for result in payload.get("Results", []):
if not isinstance(result, dict):
continue
for item in result.get("Vulnerabilities") or []:
if isinstance(item, dict) and str(item.get("Severity") or "").upper() == severity:
count += 1
return count
def _count_secrets(payload: dict[str, Any]) -> int:
"""Count detected secrets in the Trivy filesystem report."""
count = 0
for result in payload.get("Results", []):
if isinstance(result, dict):
count += len(result.get("Secrets") or [])
return count
def build_report(
trivy_payload: dict[str, Any],
waiver_path: Path | None = None,
today_override: str | None = None,
) -> dict[str, Any]:
"""Build the compliance summary consumed by the quality gate."""
policy_day = _today(today_override)
active_waivers, expired_waivers = _load_waiver_pairs(waiver_path, policy_day)
open_misconfigs: list[dict[str, str]] = []
waived_misconfigs = 0
for target, item in _iter_failed_misconfigurations(trivy_payload):
misconfiguration_id = str(item.get("ID") or "")
if (misconfiguration_id, target) in active_waivers:
waived_misconfigs += 1
continue
open_misconfigs.append(
{
"id": misconfiguration_id,
"target": target,
"severity": str(item.get("Severity") or ""),
"title": str(item.get("Title") or ""),
}
)
critical = _count_vulnerabilities(trivy_payload, "CRITICAL")
high = _count_vulnerabilities(trivy_payload, "HIGH")
secrets = _count_secrets(trivy_payload)
status = "ok" if critical == 0 and secrets == 0 and not open_misconfigs else "failed"
return {
"status": status,
"compliant": status == "ok",
"category": "artifact_security",
"scan_type": "filesystem",
"scanner": "trivy",
"critical_vulnerabilities": critical,
"high_vulnerabilities": high,
"high_vulnerability_policy": "observe",
"secrets": secrets,
"high_or_critical_misconfigurations": len(open_misconfigs),
"waived_misconfigurations": waived_misconfigs,
"expired_waivers": expired_waivers,
"waiver_file": str(waiver_path) if waiver_path else "",
"open_misconfiguration_examples": open_misconfigs[:20],
}
def main(argv: list[str] | None = None) -> int:
"""CLI entrypoint used by Jenkins after the Trivy scan completes."""
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--trivy-json", required=True)
parser.add_argument("--waivers")
parser.add_argument("--output", required=True)
parser.add_argument("--today")
args = parser.parse_args(argv)
trivy_payload = _read_json(Path(args.trivy_json))
waiver_path = Path(args.waivers) if args.waivers else None
report = build_report(trivy_payload, waiver_path=waiver_path, today_override=args.today)
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8")
return 0
if __name__ == "__main__": # pragma: no cover
raise SystemExit(main())

View File

@ -1,18 +1,52 @@
max_success_age_hours: 48
allow_suspended:
- bstein-dev-home/vaultwarden-cred-sync
- comms/guest-name-randomizer
- comms/othrys-room-reset
- comms/pin-othrys-invite
- comms/seed-othrys-room
- finance/firefly-user-sync
- health/wger-admin-ensure
- health/wger-user-sync
- mailu-mailserver/mailu-sync-nightly
- nextcloud/nextcloud-mail-sync
- vault/vault-oidc-config
ariadne_schedule_tasks: ariadne_schedule_tasks:
- schedule.mailu_sync - task: schedule.mailu_sync
- schedule.nextcloud_sync check_last_success: false
- schedule.vaultwarden_sync - task: schedule.nextcloud_sync
- schedule.wger_admin check_last_success: true
max_success_age_hours: 48
- task: schedule.nextcloud_cron
check_last_success: true
max_success_age_hours: 48
- task: schedule.nextcloud_maintenance
check_last_success: false
- task: schedule.vaultwarden_sync
check_last_success: true
max_success_age_hours: 48
- task: schedule.wger_user_sync
check_last_success: true
max_success_age_hours: 48
- task: schedule.wger_admin
check_last_success: false
- task: schedule.firefly_user_sync
check_last_success: true
max_success_age_hours: 48
- task: schedule.firefly_cron
check_last_success: false
- task: schedule.vault_k8s_auth
check_last_success: false
- task: schedule.vault_oidc
check_last_success: false
- task: schedule.comms_guest_name
check_last_success: true
max_success_age_hours: 48
- task: schedule.comms_pin_invite
check_last_success: false
- task: schedule.comms_reset_room
check_last_success: false
- task: schedule.comms_seed_room
check_last_success: true
max_success_age_hours: 48
- task: schedule.pod_cleaner
check_last_success: true
max_success_age_hours: 6
- task: schedule.opensearch_prune
check_last_success: false
- task: schedule.image_sweeper
check_last_success: true
max_success_age_hours: 18
- task: schedule.metis_k3s_token_sync
check_last_success: true
max_success_age_hours: 12
- task: schedule.platform_quality_suite_probe
check_last_success: true
max_success_age_hours: 2

View File

@ -1,5 +1,3 @@
"""Glue checks for Ariadne schedules exported to VictoriaMetrics."""
from __future__ import annotations from __future__ import annotations
import os import os
@ -28,29 +26,11 @@ def _query(promql: str) -> list[dict]:
def _expected_tasks() -> list[dict]: def _expected_tasks() -> list[dict]:
cfg = _load_config() cfg = _load_config()
tasks = [ tasks = cfg.get("ariadne_schedule_tasks", [])
_normalize_task(item, cfg)
for item in cfg.get("ariadne_schedule_tasks", [])
]
assert tasks, "No Ariadne schedule tasks configured" assert tasks, "No Ariadne schedule tasks configured"
return tasks return tasks
def _normalize_task(item: object, cfg: dict) -> dict:
if isinstance(item, str):
return {
"task": item,
"check_last_success": True,
"max_success_age_hours": cfg.get("max_success_age_hours", 48),
}
if isinstance(item, dict):
normalized = dict(item)
normalized.setdefault("check_last_success", True)
normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
return normalized
raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
def _tracked_tasks(tasks: list[dict]) -> list[dict]: def _tracked_tasks(tasks: list[dict]) -> list[dict]:
tracked = [item for item in tasks if item.get("check_last_success")] tracked = [item for item in tasks if item.get("check_last_success")]
assert tracked, "No Ariadne schedule tasks are marked for success tracking" assert tracked, "No Ariadne schedule tasks are marked for success tracking"

View File

@ -1,46 +0,0 @@
from __future__ import annotations
from datetime import datetime, timezone
from pathlib import Path
import yaml
from kubernetes import client, config
CONFIG_PATH = Path(__file__).with_name("config.yaml")
def _load_config() -> dict:
with CONFIG_PATH.open("r", encoding="utf-8") as handle:
return yaml.safe_load(handle) or {}
def _load_kube():
try:
config.load_incluster_config()
except config.ConfigException:
config.load_kube_config()
def test_glue_cronjobs_recent_success():
cfg = _load_config()
max_age_hours = int(cfg.get("max_success_age_hours", 48))
allow_suspended = set(cfg.get("allow_suspended", []))
_load_kube()
batch = client.BatchV1Api()
cronjobs = batch.list_cron_job_for_all_namespaces(label_selector="atlas.bstein.dev/glue=true").items
assert cronjobs, "No glue cronjobs found with atlas.bstein.dev/glue=true"
now = datetime.now(timezone.utc)
for cronjob in cronjobs:
name = f"{cronjob.metadata.namespace}/{cronjob.metadata.name}"
if cronjob.spec.suspend:
assert name in allow_suspended, f"{name} is suspended but not in allow_suspended"
continue
last_success = cronjob.status.last_successful_time
assert last_success is not None, f"{name} has no lastSuccessfulTime"
age_hours = (now - last_success).total_seconds() / 3600
assert age_hours <= max_age_hours, f"{name} last success {age_hours:.1f}h ago"

View File

@ -1,5 +1,3 @@
"""Glue checks for the metrics the quality-gate publishes."""
from __future__ import annotations from __future__ import annotations
import os import os
@ -27,29 +25,11 @@ def _query(promql: str) -> list[dict]:
def _expected_tasks() -> list[dict]: def _expected_tasks() -> list[dict]:
cfg = _load_config() cfg = _load_config()
tasks = [ tasks = cfg.get("ariadne_schedule_tasks", [])
_normalize_task(item, cfg)
for item in cfg.get("ariadne_schedule_tasks", [])
]
assert tasks, "No Ariadne schedule tasks configured" assert tasks, "No Ariadne schedule tasks configured"
return tasks return tasks
def _normalize_task(item: object, cfg: dict) -> dict:
if isinstance(item, str):
return {
"task": item,
"check_last_success": True,
"max_success_age_hours": cfg.get("max_success_age_hours", 48),
}
if isinstance(item, dict):
normalized = dict(item)
normalized.setdefault("check_last_success", True)
normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
return normalized
raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
def _tracked_tasks(tasks: list[dict]) -> list[dict]: def _tracked_tasks(tasks: list[dict]) -> list[dict]:
tracked = [item for item in tasks if item.get("check_last_success")] tracked = [item for item in tasks if item.get("check_last_success")]
assert tracked, "No Ariadne schedule tasks are marked for success tracking" assert tracked, "No Ariadne schedule tasks are marked for success tracking"

View File

@ -1,401 +0,0 @@
{
"version": 1,
"generated_from": "Jenkins titan-iac build 225 Trivy filesystem scan",
"default_expires_at": "2026-05-22",
"ticket": "atlas-quality-wave-k8s-hardening",
"default_reason": "Existing Kubernetes manifest hardening baseline accepted only for the first quality-gate rollout; fix or renew explicitly before expiry.",
"misconfigurations": [
{
"id": "DS-0002",
"targets": [
"dockerfiles/Dockerfile.ananke-node-helper"
]
},
{
"id": "KSV-0009",
"targets": [
"services/mailu/vip-controller.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml"
]
},
{
"id": "KSV-0010",
"targets": [
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml"
]
},
{
"id": "KSV-0014",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
"infrastructure/core/ntp-sync-daemonset.yaml",
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
"infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
"infrastructure/longhorn/core/vault-sync-deployment.yaml",
"infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
"infrastructure/postgres/statefulset.yaml",
"infrastructure/vault-csi/vault-csi-provider.yaml",
"services/ai-llm/deployment.yaml",
"services/bstein-dev-home/backend-deployment.yaml",
"services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
"services/bstein-dev-home/frontend-deployment.yaml",
"services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
"services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
"services/bstein-dev-home/vault-sync-deployment.yaml",
"services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
"services/comms/atlasbot-deployment.yaml",
"services/comms/coturn.yaml",
"services/comms/element-call-deployment.yaml",
"services/comms/guest-name-job.yaml",
"services/comms/guest-register-deployment.yaml",
"services/comms/livekit-token-deployment.yaml",
"services/comms/livekit.yaml",
"services/comms/mas-deployment.yaml",
"services/comms/oneoffs/bstein-force-leave-job.yaml",
"services/comms/oneoffs/comms-secrets-ensure-job.yaml",
"services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
"services/comms/oneoffs/mas-db-ensure-job.yaml",
"services/comms/oneoffs/mas-local-users-ensure-job.yaml",
"services/comms/oneoffs/othrys-kick-numeric-job.yaml",
"services/comms/oneoffs/synapse-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
"services/comms/oneoffs/synapse-user-seed-job.yaml",
"services/comms/pin-othrys-job.yaml",
"services/comms/reset-othrys-room-job.yaml",
"services/comms/seed-othrys-room.yaml",
"services/comms/vault-sync-deployment.yaml",
"services/comms/wellknown.yaml",
"services/crypto/monerod/deployment.yaml",
"services/crypto/wallet-monero-temp/deployment.yaml",
"services/crypto/xmr-miner/deployment.yaml",
"services/crypto/xmr-miner/vault-sync-deployment.yaml",
"services/crypto/xmr-miner/xmrig-daemonset.yaml",
"services/finance/actual-budget-deployment.yaml",
"services/finance/firefly-cronjob.yaml",
"services/finance/firefly-deployment.yaml",
"services/finance/firefly-user-sync-cronjob.yaml",
"services/finance/oneoffs/finance-secrets-ensure-job.yaml",
"services/gitea/deployment.yaml",
"services/harbor/vault-sync-deployment.yaml",
"services/health/wger-admin-ensure-cronjob.yaml",
"services/health/wger-deployment.yaml",
"services/health/wger-user-sync-cronjob.yaml",
"services/jellyfin/deployment.yaml",
"services/jellyfin/loader.yaml",
"services/jenkins/deployment.yaml",
"services/jenkins/vault-sync-deployment.yaml",
"services/keycloak/deployment.yaml",
"services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/ldap-federation-job.yaml",
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-e2e-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
"services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
"services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/realm-settings-job.yaml",
"services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/user-overrides-job.yaml",
"services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
"services/keycloak/vault-sync-deployment.yaml",
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/logging/oauth2-proxy.yaml",
"services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
"services/logging/oneoffs/opensearch-ism-job.yaml",
"services/logging/oneoffs/opensearch-observability-setup-job.yaml",
"services/logging/opensearch-prune-cronjob.yaml",
"services/logging/vault-sync-deployment.yaml",
"services/mailu/mailu-sync-cronjob.yaml",
"services/mailu/mailu-sync-listener.yaml",
"services/mailu/oneoffs/mailu-sync-job.yaml",
"services/mailu/vault-sync-deployment.yaml",
"services/mailu/vip-controller.yaml",
"services/maintenance/ariadne-deployment.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/metis-k3s-token-sync-cronjob.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oauth2-proxy-metis.yaml",
"services/maintenance/oauth2-proxy-soteria.yaml",
"services/maintenance/oneoffs/ariadne-migrate-job.yaml",
"services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
"services/maintenance/pod-cleaner-cronjob.yaml",
"services/maintenance/soteria-deployment.yaml",
"services/maintenance/vault-sync-deployment.yaml",
"services/monitoring/dcgm-exporter.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml",
"services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
"services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
"services/monitoring/platform-quality-gateway-deployment.yaml",
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
"services/monitoring/postmark-exporter-deployment.yaml",
"services/monitoring/vault-sync-deployment.yaml",
"services/nextcloud-mail-sync/cronjob.yaml",
"services/nextcloud/collabora.yaml",
"services/nextcloud/cronjob.yaml",
"services/nextcloud/deployment.yaml",
"services/nextcloud/maintenance-cronjob.yaml",
"services/oauth2-proxy/deployment.yaml",
"services/openldap/statefulset.yaml",
"services/outline/deployment.yaml",
"services/outline/redis-deployment.yaml",
"services/pegasus/deployment.yaml",
"services/pegasus/vault-sync-deployment.yaml",
"services/planka/deployment.yaml",
"services/quality/oauth2-proxy-sonarqube.yaml",
"services/quality/sonarqube-deployment.yaml",
"services/quality/sonarqube-exporter-deployment.yaml",
"services/sui-metrics/base/deployment.yaml",
"services/typhon/vault-sync-deployment.yaml",
"services/vault/k8s-auth-config-cronjob.yaml",
"services/vault/oidc-config-cronjob.yaml",
"services/vault/statefulset.yaml",
"services/vaultwarden/deployment.yaml"
]
},
{
"id": "KSV-0017",
"targets": [
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
"services/monitoring/dcgm-exporter.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml"
]
},
{
"id": "KSV-0041",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
"infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
"infrastructure/traefik/clusterrole.yaml",
"services/bstein-dev-home/rbac.yaml",
"services/comms/comms-secrets-ensure-rbac.yaml",
"services/comms/mas-db-ensure-rbac.yaml",
"services/comms/mas-secrets-ensure-rbac.yaml",
"services/maintenance/soteria-rbac.yaml"
]
},
{
"id": "KSV-0047",
"targets": [
"services/monitoring/rbac.yaml"
]
},
{
"id": "KSV-0053",
"targets": [
"services/comms/comms-secrets-ensure-rbac.yaml",
"services/comms/mas-db-ensure-rbac.yaml",
"services/jenkins/serviceaccount.yaml",
"services/maintenance/ariadne-rbac.yaml"
]
},
{
"id": "KSV-0056",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
"infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
"services/jenkins/serviceaccount.yaml",
"services/maintenance/disable-k3s-traefik-rbac.yaml",
"services/maintenance/k3s-traefik-cleanup-rbac.yaml"
]
},
{
"id": "KSV-0114",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml"
]
},
{
"id": "KSV-0118",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
"infrastructure/core/coredns-deployment.yaml",
"infrastructure/core/ntp-sync-daemonset.yaml",
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
"infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
"infrastructure/longhorn/core/vault-sync-deployment.yaml",
"infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
"infrastructure/postgres/statefulset.yaml",
"infrastructure/vault-csi/vault-csi-provider.yaml",
"services/ai-llm/deployment.yaml",
"services/bstein-dev-home/backend-deployment.yaml",
"services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
"services/bstein-dev-home/frontend-deployment.yaml",
"services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
"services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
"services/bstein-dev-home/vault-sync-deployment.yaml",
"services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
"services/comms/atlasbot-deployment.yaml",
"services/comms/coturn.yaml",
"services/comms/element-call-deployment.yaml",
"services/comms/guest-name-job.yaml",
"services/comms/livekit-token-deployment.yaml",
"services/comms/livekit.yaml",
"services/comms/mas-deployment.yaml",
"services/comms/oneoffs/bstein-force-leave-job.yaml",
"services/comms/oneoffs/comms-secrets-ensure-job.yaml",
"services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
"services/comms/oneoffs/mas-db-ensure-job.yaml",
"services/comms/oneoffs/mas-local-users-ensure-job.yaml",
"services/comms/oneoffs/othrys-kick-numeric-job.yaml",
"services/comms/oneoffs/synapse-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
"services/comms/oneoffs/synapse-user-seed-job.yaml",
"services/comms/pin-othrys-job.yaml",
"services/comms/reset-othrys-room-job.yaml",
"services/comms/seed-othrys-room.yaml",
"services/comms/vault-sync-deployment.yaml",
"services/comms/wellknown.yaml",
"services/crypto/monerod/deployment.yaml",
"services/crypto/wallet-monero-temp/deployment.yaml",
"services/crypto/xmr-miner/deployment.yaml",
"services/crypto/xmr-miner/vault-sync-deployment.yaml",
"services/crypto/xmr-miner/xmrig-daemonset.yaml",
"services/finance/firefly-cronjob.yaml",
"services/finance/firefly-deployment.yaml",
"services/finance/firefly-user-sync-cronjob.yaml",
"services/finance/oneoffs/finance-secrets-ensure-job.yaml",
"services/gitea/deployment.yaml",
"services/harbor/vault-sync-deployment.yaml",
"services/health/wger-admin-ensure-cronjob.yaml",
"services/health/wger-deployment.yaml",
"services/health/wger-user-sync-cronjob.yaml",
"services/jellyfin/loader.yaml",
"services/jenkins/deployment.yaml",
"services/jenkins/vault-sync-deployment.yaml",
"services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/ldap-federation-job.yaml",
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-e2e-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
"services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
"services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/realm-settings-job.yaml",
"services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/user-overrides-job.yaml",
"services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
"services/keycloak/vault-sync-deployment.yaml",
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/logging/oauth2-proxy.yaml",
"services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
"services/logging/oneoffs/opensearch-ism-job.yaml",
"services/logging/oneoffs/opensearch-observability-setup-job.yaml",
"services/logging/opensearch-prune-cronjob.yaml",
"services/logging/vault-sync-deployment.yaml",
"services/mailu/mailu-sync-cronjob.yaml",
"services/mailu/mailu-sync-listener.yaml",
"services/mailu/oneoffs/mailu-sync-job.yaml",
"services/mailu/vault-sync-deployment.yaml",
"services/mailu/vip-controller.yaml",
"services/maintenance/ariadne-deployment.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/metis-k3s-token-sync-cronjob.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oauth2-proxy-metis.yaml",
"services/maintenance/oauth2-proxy-soteria.yaml",
"services/maintenance/oneoffs/ariadne-migrate-job.yaml",
"services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
"services/maintenance/pod-cleaner-cronjob.yaml",
"services/maintenance/soteria-deployment.yaml",
"services/maintenance/vault-sync-deployment.yaml",
"services/monitoring/dcgm-exporter.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml",
"services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
"services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
"services/monitoring/platform-quality-gateway-deployment.yaml",
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
"services/monitoring/postmark-exporter-deployment.yaml",
"services/monitoring/vault-sync-deployment.yaml",
"services/nextcloud/collabora.yaml",
"services/oauth2-proxy/deployment.yaml",
"services/openldap/statefulset.yaml",
"services/outline/deployment.yaml",
"services/outline/redis-deployment.yaml",
"services/pegasus/vault-sync-deployment.yaml",
"services/quality/oauth2-proxy-sonarqube.yaml",
"services/quality/sonarqube-deployment.yaml",
"services/quality/sonarqube-exporter-deployment.yaml",
"services/sui-metrics/base/deployment.yaml",
"services/sui-metrics/overlays/atlas/patch-node-selector.yaml",
"services/typhon/deployment.yaml",
"services/typhon/vault-sync-deployment.yaml",
"services/vault/k8s-auth-config-cronjob.yaml",
"services/vault/oidc-config-cronjob.yaml",
"services/vaultwarden/deployment.yaml"
]
},
{
"id": "KSV-0121",
"targets": [
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml"
]
}
]
}

View File

@ -13,14 +13,14 @@ spec:
git: git:
checkout: checkout:
ref: ref:
branch: main branch: feature/ariadne
commit: commit:
author: author:
email: ops@bstein.dev email: ops@bstein.dev
name: flux-bot name: flux-bot
messageTemplate: "chore(bstein-dev-home): automated image update" messageTemplate: "chore(bstein-dev-home): automated image update"
push: push:
branch: main branch: feature/ariadne
update: update:
strategy: Setters strategy: Setters
path: services/bstein-dev-home path: services/bstein-dev-home

View File

@ -21,7 +21,6 @@ resources:
- sui-metrics/kustomization.yaml - sui-metrics/kustomization.yaml
- openldap/kustomization.yaml - openldap/kustomization.yaml
- keycloak/kustomization.yaml - keycloak/kustomization.yaml
- quality/kustomization.yaml
- oauth2-proxy/kustomization.yaml - oauth2-proxy/kustomization.yaml
- mailu/kustomization.yaml - mailu/kustomization.yaml
- jenkins/kustomization.yaml - jenkins/kustomization.yaml

View File

@ -1,35 +0,0 @@
# clusters/atlas/flux-system/applications/quality/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: quality
namespace: flux-system
spec:
interval: 10m
path: ./services/quality
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: quality
dependsOn:
- name: traefik
- name: cert-manager
- name: keycloak
- name: vault
- name: postgres
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: sonarqube
namespace: quality
- apiVersion: apps/v1
kind: Deployment
name: sonarqube-exporter
namespace: quality
- apiVersion: apps/v1
kind: Deployment
name: oauth2-proxy-sonarqube
namespace: quality
wait: false
timeout: 20m

View File

@ -13,14 +13,14 @@ spec:
git: git:
checkout: checkout:
ref: ref:
branch: main branch: feature/ariadne
commit: commit:
author: author:
email: ops@bstein.dev email: ops@bstein.dev
name: flux-bot name: flux-bot
messageTemplate: "chore(maintenance): automated image update" messageTemplate: "chore(maintenance): automated image update"
push: push:
branch: main branch: feature/ariadne
update: update:
strategy: Setters strategy: Setters
path: services/maintenance path: services/maintenance

View File

@ -2,8 +2,4 @@ FROM python:3.11-slim
ENV PIP_DISABLE_PIP_VERSION_CHECK=1 ENV PIP_DISABLE_PIP_VERSION_CHECK=1
RUN pip install --no-cache-dir requests psycopg2-binary \ RUN pip install --no-cache-dir requests psycopg2-binary
&& groupadd --system guest-tools \
&& useradd --system --uid 65532 --gid guest-tools --home-dir /nonexistent --shell /usr/sbin/nologin guest-tools
USER guest-tools

View File

@ -1,8 +1,16 @@
# Use the mirrored Harbor artifact so CI does not depend on Docker Hub egress. FROM --platform=$BUILDPLATFORM opensearchproject/data-prepper:2.8.0 AS source
FROM registry.bstein.dev/streaming/data-prepper@sha256:32ac6ad42e0f12da08bebee307e290b17d127b30def9b06eeaffbcbbc5033e83
FROM --platform=$TARGETPLATFORM eclipse-temurin:17-jre
ENV DATA_PREPPER_PATH=/usr/share/data-prepper ENV DATA_PREPPER_PATH=/usr/share/data-prepper
RUN useradd -u 10001 -M -U -d / -s /usr/sbin/nologin data_prepper \
&& mkdir -p /var/log/data-prepper
COPY --from=source /usr/share/data-prepper /usr/share/data-prepper
RUN chown -R 10001:10001 /usr/share/data-prepper /var/log/data-prepper
USER 10001 USER 10001
WORKDIR /usr/share/data-prepper WORKDIR /usr/share/data-prepper
CMD ["bin/data-prepper"] CMD ["bin/data-prepper"]

View File

@ -1,13 +1,10 @@
FROM ghcr.io/element-hq/lk-jwt-service:0.3.0 AS base FROM ghcr.io/element-hq/lk-jwt-service:0.3.0 AS base
FROM alpine:3.20 FROM alpine:3.20
RUN apk add --no-cache ca-certificates \ RUN apk add --no-cache ca-certificates
&& addgroup -S livekit-token \
&& adduser -S -D -H -u 65532 -G livekit-token livekit-token
COPY --from=base /lk-jwt-service /lk-jwt-service COPY --from=base /lk-jwt-service /lk-jwt-service
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh RUN chmod 0755 /entrypoint.sh
USER livekit-token
ENTRYPOINT ["/entrypoint.sh"] ENTRYPOINT ["/entrypoint.sh"]
CMD ["/lk-jwt-service"] CMD ["/lk-jwt-service"]

View File

@ -29,12 +29,10 @@ FROM ${DEBIAN_IMAGE}
RUN set -eux; \ RUN set -eux; \
apt-get update; \ apt-get update; \
apt-get install -y --no-install-recommends ca-certificates; \ apt-get install -y --no-install-recommends ca-certificates; \
update-ca-certificates; rm -rf /var/lib/apt/lists/*; \ update-ca-certificates; rm -rf /var/lib/apt/lists/*
groupadd --system p2pool; \
useradd --system --uid 65532 --gid p2pool --home-dir /nonexistent --shell /usr/sbin/nologin p2pool
COPY --from=fetch /out/p2pool /usr/local/bin/p2pool COPY --from=fetch /out/p2pool /usr/local/bin/p2pool
RUN /usr/local/bin/p2pool --version || true RUN /usr/local/bin/p2pool --version || true
EXPOSE 3333 EXPOSE 3333
USER p2pool
ENTRYPOINT ["/usr/local/bin/p2pool"] ENTRYPOINT ["/usr/local/bin/p2pool"]

View File

@ -26,12 +26,9 @@ RUN set -eux; \
curl -fsSL "$URL" -o /opt/monero/monero.tar.bz2; \ curl -fsSL "$URL" -o /opt/monero/monero.tar.bz2; \
tar -xjf /opt/monero/monero.tar.bz2 -C /opt/monero --strip-components=1; \ tar -xjf /opt/monero/monero.tar.bz2 -C /opt/monero --strip-components=1; \
install -m 0755 /opt/monero/monero-wallet-rpc /usr/local/bin/monero-wallet-rpc; \ install -m 0755 /opt/monero/monero-wallet-rpc /usr/local/bin/monero-wallet-rpc; \
rm -f /opt/monero/monero.tar.bz2; \ rm -f /opt/monero/monero.tar.bz2
groupadd --system monero; \
useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero
ENV PATH="/usr/local/bin:/usr/bin:/bin" ENV PATH="/usr/local/bin:/usr/bin:/bin"
RUN /usr/local/bin/monero-wallet-rpc --version || true RUN /usr/local/bin/monero-wallet-rpc --version || true
EXPOSE 18083 EXPOSE 18083
USER monero

View File

@ -23,14 +23,10 @@ RUN set -eux; \
mkdir -p /opt/monero; \ mkdir -p /opt/monero; \
tar -xjf /tmp/monero.tar.bz2 -C /opt/monero --strip-components=1; \ tar -xjf /tmp/monero.tar.bz2 -C /opt/monero --strip-components=1; \
rm -f /tmp/monero.tar.bz2; \ rm -f /tmp/monero.tar.bz2; \
groupadd --system monero; \
useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero; \
mkdir -p /data; \ mkdir -p /data; \
chown monero:monero /data; \
chmod 0770 /data chmod 0770 /data
ENV LD_LIBRARY_PATH=/opt/monero:/opt/monero/lib \ ENV LD_LIBRARY_PATH=/opt/monero:/opt/monero/lib \
PATH="/opt/monero:${PATH}" PATH="/opt/monero:${PATH}"
USER monero
CMD ["/opt/monero/monerod", "--version"] CMD ["/opt/monero/monerod", "--version"]

View File

@ -1,13 +1,10 @@
FROM quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 AS base FROM quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 AS base
FROM alpine:3.20 FROM alpine:3.20
RUN apk add --no-cache ca-certificates \ RUN apk add --no-cache ca-certificates
&& addgroup -S oauth2-proxy \
&& adduser -S -D -H -u 65532 -G oauth2-proxy oauth2-proxy
COPY --from=base /bin/oauth2-proxy /bin/oauth2-proxy COPY --from=base /bin/oauth2-proxy /bin/oauth2-proxy
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh RUN chmod 0755 /entrypoint.sh
USER oauth2-proxy
ENTRYPOINT ["/entrypoint.sh"] ENTRYPOINT ["/entrypoint.sh"]
CMD ["/bin/oauth2-proxy"] CMD ["/bin/oauth2-proxy"]

View File

@ -1,13 +1,10 @@
FROM registry.bstein.dev/streaming/pegasus:1.2.32 AS base FROM registry.bstein.dev/streaming/pegasus:1.2.32 AS base
FROM alpine:3.20 FROM alpine:3.20
RUN apk add --no-cache ca-certificates \ RUN apk add --no-cache ca-certificates
&& addgroup -S pegasus \
&& adduser -S -D -H -u 65532 -G pegasus pegasus
COPY --from=base /pegasus /pegasus COPY --from=base /pegasus /pegasus
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh RUN chmod 0755 /entrypoint.sh
USER pegasus
ENTRYPOINT ["/entrypoint.sh"] ENTRYPOINT ["/entrypoint.sh"]
CMD ["/pegasus"] CMD ["/pegasus"]

View File

@ -1,48 +0,0 @@
# dockerfiles/Dockerfile.quality-tools
FROM debian:bookworm-slim
ARG SONAR_SCANNER_VERSION=8.0.1.6346
ARG TRIVY_VERSION=0.70.0
ENV TRIVY_CACHE_DIR=/opt/trivy-cache
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
bash \
ca-certificates \
curl \
git \
jq \
unzip \
&& rm -rf /var/lib/apt/lists/* \
&& groupadd --system quality-tools \
&& useradd --system --uid 65532 --gid quality-tools --home-dir /nonexistent --shell /usr/sbin/nologin quality-tools
RUN set -eux; \
scanner_zip="sonar-scanner-cli-${SONAR_SCANNER_VERSION}-linux-aarch64.zip"; \
base_url="https://binaries.sonarsource.com/Distribution/sonar-scanner-cli"; \
curl -fsSL "${base_url}/${scanner_zip}" -o "/tmp/${scanner_zip}"; \
curl -fsSL "${base_url}/${scanner_zip}.sha256" -o "/tmp/${scanner_zip}.sha256"; \
printf '%s %s\n' "$(cat "/tmp/${scanner_zip}.sha256")" "/tmp/${scanner_zip}" | sha256sum -c -; \
unzip -q "/tmp/${scanner_zip}" -d /opt; \
ln -s "/opt/sonar-scanner-${SONAR_SCANNER_VERSION}-linux-aarch64/bin/sonar-scanner" /usr/local/bin/sonar-scanner; \
rm -f "/tmp/${scanner_zip}" "/tmp/${scanner_zip}.sha256"
RUN set -eux; \
trivy_tgz="trivy_${TRIVY_VERSION}_Linux-ARM64.tar.gz"; \
curl -fsSL "https://github.com/aquasecurity/trivy/releases/download/v${TRIVY_VERSION}/${trivy_tgz}" -o "/tmp/${trivy_tgz}"; \
tar -C /usr/local/bin -xzf "/tmp/${trivy_tgz}" trivy; \
rm -f "/tmp/${trivy_tgz}"; \
trivy --version; \
sonar-scanner -v
RUN set -eux; \
mkdir -p "${TRIVY_CACHE_DIR}"; \
trivy image --download-db-only --cache-dir "${TRIVY_CACHE_DIR}"; \
chmod -R a+rX "${TRIVY_CACHE_DIR}"; \
mkdir -p /workspace; \
chown quality-tools:quality-tools /workspace
WORKDIR /workspace
USER quality-tools

View File

@ -33,36 +33,6 @@ spec:
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
affinity: affinity:
nodeAffinity: nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
requiredDuringSchedulingIgnoredDuringExecution: requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms: nodeSelectorTerms:
- matchExpressions: - matchExpressions:
@ -76,36 +46,6 @@ spec:
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
affinity: affinity:
nodeAffinity: nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
requiredDuringSchedulingIgnoredDuringExecution: requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms: nodeSelectorTerms:
- matchExpressions: - matchExpressions:
@ -119,36 +59,6 @@ spec:
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
affinity: affinity:
nodeAffinity: nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
requiredDuringSchedulingIgnoredDuringExecution: requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms: nodeSelectorTerms:
- matchExpressions: - matchExpressions:

View File

@ -26,9 +26,6 @@ spec:
cleanupOnFail: true cleanupOnFail: true
timeout: 15m timeout: 15m
values: values:
global:
nodeSelector:
longhorn-host: "true"
service: service:
ui: ui:
type: NodePort type: NodePort
@ -81,12 +78,3 @@ spec:
tag: v2.16.0 tag: v2.16.0
defaultSettings: defaultSettings:
systemManagedPodsImagePullPolicy: Always systemManagedPodsImagePullPolicy: Always
longhornManager:
nodeSelector:
longhorn-host: "true"
longhornDriver:
nodeSelector:
longhorn-host: "true"
longhornUI:
nodeSelector:
longhorn-host: "true"

View File

@ -2,11 +2,10 @@
apiVersion: batch/v1 apiVersion: batch/v1
kind: Job kind: Job
metadata: metadata:
name: longhorn-settings-ensure-7 name: longhorn-settings-ensure-4
namespace: longhorn-system namespace: longhorn-system
spec: spec:
backoffLimit: 0 backoffLimit: 0
activeDeadlineSeconds: 240
ttlSecondsAfterFinished: 3600 ttlSecondsAfterFinished: 3600
template: template:
spec: spec:

View File

@ -4,12 +4,11 @@ set -eu
# Longhorn blocks direct CR patches for some settings; use the internal API instead. # Longhorn blocks direct CR patches for some settings; use the internal API instead.
api_base="http://longhorn-backend.longhorn-system.svc:9500/v1/settings" api_base="http://longhorn-backend.longhorn-system.svc:9500/v1/settings"
curl_opts="-fsS --connect-timeout 3 --max-time 15"
wait_for_api() { wait_for_api() {
attempts=30 attempts=30
while [ "${attempts}" -gt 0 ]; do while [ "${attempts}" -gt 0 ]; do
if curl ${curl_opts} "${api_base}" >/dev/null 2>&1; then if curl -fsS "${api_base}" >/dev/null 2>&1; then
return 0 return 0
fi fi
attempts=$((attempts - 1)) attempts=$((attempts - 1))
@ -23,14 +22,14 @@ update_setting() {
name="$1" name="$1"
value="$2" value="$2"
current="$(curl ${curl_opts} "${api_base}/${name}" || true)" current="$(curl -fsS "${api_base}/${name}" || true)"
if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then
echo "Setting ${name} already set." echo "Setting ${name} already set."
return 0 return 0
fi fi
echo "Setting ${name} -> ${value}" echo "Setting ${name} -> ${value}"
curl ${curl_opts} -X PUT \ curl -fsS -X PUT \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d "{\"value\":\"${value}\"}" \ -d "{\"value\":\"${value}\"}" \
"${api_base}/${name}" >/dev/null "${api_base}/${name}" >/dev/null
@ -41,7 +40,3 @@ update_setting default-engine-image "registry.bstein.dev/infra/longhorn-engine:v
update_setting default-instance-manager-image "registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2" update_setting default-instance-manager-image "registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2"
update_setting default-backing-image-manager-image "registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2" update_setting default-backing-image-manager-image "registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2"
update_setting support-bundle-manager-image "registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56" update_setting support-bundle-manager-image "registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56"
# Keep storage-heavy nodes from getting hammered by rebuild storms and skew.
update_setting replica-auto-balance "best-effort"
update_setting concurrent-replica-rebuild-per-node-limit "2"
update_setting node-down-pod-deletion-policy "delete-both-statefulset-and-deployment-pod"

View File

@ -13,27 +13,9 @@ spec:
- objectName: "harbor-pull__dockerconfigjson" - objectName: "harbor-pull__dockerconfigjson"
secretPath: "kv/data/atlas/shared/harbor-pull" secretPath: "kv/data/atlas/shared/harbor-pull"
secretKey: "dockerconfigjson" secretKey: "dockerconfigjson"
- objectName: "longhorn-backup-b2__AWS_ACCESS_KEY_ID"
secretPath: "kv/data/atlas/longhorn/backup-b2"
secretKey: "AWS_ACCESS_KEY_ID"
- objectName: "longhorn-backup-b2__AWS_SECRET_ACCESS_KEY"
secretPath: "kv/data/atlas/longhorn/backup-b2"
secretKey: "AWS_SECRET_ACCESS_KEY"
- objectName: "longhorn-backup-b2__AWS_ENDPOINTS"
secretPath: "kv/data/atlas/longhorn/backup-b2"
secretKey: "AWS_ENDPOINTS"
secretObjects: secretObjects:
- secretName: longhorn-registry - secretName: longhorn-registry
type: kubernetes.io/dockerconfigjson type: kubernetes.io/dockerconfigjson
data: data:
- objectName: harbor-pull__dockerconfigjson - objectName: harbor-pull__dockerconfigjson
key: .dockerconfigjson key: .dockerconfigjson
- secretName: longhorn-backup-b2
type: Opaque
data:
- objectName: longhorn-backup-b2__AWS_ACCESS_KEY_ID
key: AWS_ACCESS_KEY_ID
- objectName: longhorn-backup-b2__AWS_SECRET_ACCESS_KEY
key: AWS_SECRET_ACCESS_KEY
- objectName: longhorn-backup-b2__AWS_ENDPOINTS
key: AWS_ENDPOINTS

View File

@ -26,16 +26,6 @@ spec:
- key: hardware - key: hardware
operator: In operator: In
values: ["rpi5", "rpi4"] values: ["rpi5", "rpi4"]
- weight: 90
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
containers: containers:
- name: sync - name: sync
image: alpine:3.20 image: alpine:3.20

View File

@ -70,38 +70,6 @@ items:
dnsPolicy: ClusterFirst dnsPolicy: ClusterFirst
nodeSelector: nodeSelector:
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
restartPolicy: Always restartPolicy: Always
schedulerName: default-scheduler schedulerName: default-scheduler
serviceAccount: atlas-traefik-ingress-controller serviceAccount: atlas-traefik-ingress-controller

View File

@ -41,12 +41,3 @@ spec:
failurePolicy: Ignore failurePolicy: Ignore
nodeSelector: nodeSelector:
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values: ["titan-13", "titan-15", "titan-17", "titan-19"]

File diff suppressed because it is too large Load Diff

View File

@ -4,21 +4,13 @@ import pathlib
def load_module(): def load_module():
path = pathlib.Path(__file__).resolve().parents[1] / "dashboards_render_atlas.py" path = pathlib.Path(__file__).resolve().parents[1] / "dashboards_render_atlas.py"
spec = importlib.util.spec_from_file_location("scripts.dashboards_render_atlas", path) spec = importlib.util.spec_from_file_location("dashboards_render_atlas", path)
module = importlib.util.module_from_spec(spec) module = importlib.util.module_from_spec(spec)
assert spec.loader is not None assert spec.loader is not None
spec.loader.exec_module(module) spec.loader.exec_module(module)
return module return module
def flatten_panels(panels):
flat = []
for panel in panels:
flat.append(panel)
flat.extend(panel.get("panels", []))
return flat
def test_table_panel_options_and_filterable(): def test_table_panel_options_and_filterable():
mod = load_module() mod = load_module()
panel = mod.table_panel( panel = mod.table_panel(
@ -64,71 +56,3 @@ def test_render_configmap_writes(tmp_path):
content = (tmp_path / "cm.yaml").read_text() content = (tmp_path / "cm.yaml").read_text()
assert "kind: ConfigMap" in content assert "kind: ConfigMap" in content
assert f"{uid}.json" in content assert f"{uid}.json" in content
def test_testing_suite_variable_uses_canonical_values_only():
mod = load_module()
variable = mod.testing_suite_variable()
canonical_matcher = "|".join(mod.PLATFORM_TEST_SUITE_NAMES)
legacy_names = {"bstein-home", "data-prepper", "titan-iac", "pegasus-health"}
assert variable["allValue"] == canonical_matcher
assert not any(alias in variable["query"] for alias in legacy_names)
assert not any(alias in variable["allValue"] for alias in legacy_names)
assert [option["value"] for option in variable["options"]] == mod.PLATFORM_TEST_SUITE_NAMES
def test_jobs_dashboard_separates_current_gate_health_from_reliability():
mod = load_module()
dashboard = mod.build_jobs_dashboard()
panels_by_title = {panel["title"]: panel for panel in flatten_panels(dashboard["panels"])}
assert "Current Gate Health by Suite" in panels_by_title
assert "Run Reliability by Suite (24h)" in panels_by_title
assert "Run Reliability History by Suite" in panels_by_title
assert "Failures by Suite (24h)" not in panels_by_title
assert "Success Rate by Suite (24h)" not in panels_by_title
current_gate_expr = panels_by_title["Current Gate Health by Suite"]["targets"][0]["expr"]
assert 'check)' in current_gate_expr
assert 'result=~"ok|passed|success|not_applicable|skipped|na|n/a"' in current_gate_expr
reliability_panel = panels_by_title["Run Reliability by Suite (24h)"]
reliability_expr = reliability_panel["targets"][0]["expr"]
assert "platform_quality_gate_runs_total" in reliability_expr
assert "> 0" in reliability_expr
assert "- 1" in reliability_expr
assert reliability_panel["fieldConfig"]["defaults"]["mappings"] == [
{"type": "value", "options": {"-1": {"text": "no runs"}}}
]
def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint():
mod = load_module()
dashboard = mod.build_jobs_dashboard()
panels = dashboard["panels"]
rows = [panel for panel in panels if panel["type"] == "row"]
visible_query_panels = [panel for panel in panels if panel["type"] != "row"]
nested_panels_by_title = {
child["title"]: child
for row in rows
for child in row.get("panels", [])
}
assert len(panels) == 16
assert len(visible_query_panels) == 11
assert sum(len(panel.get("targets", [])) for panel in visible_query_panels) == 11
assert [row["title"] for row in rows] == [
"Reliability And Run History",
"Failure Trends By Check",
"Success Trends By Check",
"Test Drilldowns And Problem Tests",
"Telemetry Completeness, SonarQube, And Branches",
]
assert all(row["collapsed"] for row in rows)
assert "Failure Trend: Coverage" in nested_panels_by_title
assert "Success Trend: Supply Chain" in nested_panels_by_title
assert "Selected Test Pass Rate History" in nested_panels_by_title
assert "Missing Coverage Metrics by Suite" in nested_panels_by_title
assert "SonarQube API Up" in nested_panels_by_title

View File

@ -138,100 +138,6 @@ def test_kc_get_users_paginates(monkeypatch):
assert sync.SESSION.calls == 1 assert sync.SESSION.calls == 1
def test_kc_get_users_fetches_second_page_after_full_batch(monkeypatch):
sync = load_sync_module(monkeypatch)
class _PagedSession:
def __init__(self):
self.calls = 0
self.first_params = []
def get(self, *_, **kwargs):
self.calls += 1
self.first_params.append(kwargs["params"]["first"])
if self.calls == 1:
return _FakeResponse([{"id": f"u{i}"} for i in range(200)])
return _FakeResponse([{"id": "last"}])
sync.SESSION = _PagedSession()
users = sync.kc_get_users("tok")
assert len(users) == 201
assert sync.SESSION.first_params == [0, 200]
def test_get_kc_token_posts_client_credentials(monkeypatch):
sync = load_sync_module(monkeypatch)
calls = []
class _TokenSession:
def post(self, url, data, timeout):
calls.append((url, data, timeout))
return _FakeResponse({"access_token": "tok"})
sync.SESSION = _TokenSession()
assert sync.get_kc_token() == "tok"
assert calls[0][1]["grant_type"] == "client_credentials"
def test_retry_request_retries_then_succeeds(monkeypatch):
sync = load_sync_module(monkeypatch)
attempts = []
sleeps = []
def _flaky():
attempts.append(1)
if len(attempts) == 1:
raise sync.requests.RequestException("temporary")
return "ok"
monkeypatch.setattr(sync.time, "sleep", lambda seconds: sleeps.append(seconds))
assert sync.retry_request("request", _flaky, attempts=2) == "ok"
assert sleeps == [2]
def test_retry_request_reraises_final_error(monkeypatch):
sync = load_sync_module(monkeypatch)
monkeypatch.setattr(sync.time, "sleep", lambda seconds: None)
with pytest.raises(sync.requests.RequestException):
sync.retry_request(
"request",
lambda: (_ for _ in ()).throw(sync.requests.RequestException("nope")),
attempts=1,
)
def test_retry_db_connect_retries_then_succeeds(monkeypatch):
sync = load_sync_module(monkeypatch)
attempts = []
sleeps = []
def _connect(**kwargs):
attempts.append(kwargs)
if len(attempts) == 1:
raise sync.psycopg2.Error("not yet")
return "conn"
monkeypatch.setattr(sync.psycopg2, "connect", _connect)
monkeypatch.setattr(sync.time, "sleep", lambda seconds: sleeps.append(seconds))
assert sync.retry_db_connect(attempts=2) == "conn"
assert sleeps == [2]
def test_retry_db_connect_reraises_final_error(monkeypatch):
sync = load_sync_module(monkeypatch)
monkeypatch.setattr(sync.psycopg2, "connect", lambda **kwargs: (_ for _ in ()).throw(sync.psycopg2.Error("down")))
monkeypatch.setattr(sync.time, "sleep", lambda seconds: None)
with pytest.raises(sync.psycopg2.Error):
sync.retry_db_connect(attempts=1)
def test_ensure_mailu_user_skips_foreign_domain(monkeypatch): def test_ensure_mailu_user_skips_foreign_domain(monkeypatch):
sync = load_sync_module(monkeypatch) sync = load_sync_module(monkeypatch)
executed = [] executed = []
@ -260,87 +166,6 @@ def test_ensure_mailu_user_upserts(monkeypatch):
assert captured["password"] != "pw" assert captured["password"] != "pw"
def test_attribute_and_email_helpers(monkeypatch):
sync = load_sync_module(monkeypatch)
assert sync.get_attribute_value({"x": ["first", "second"]}, "x") == "first"
assert sync.get_attribute_value({"x": []}, "x") is None
assert sync.get_attribute_value({"x": "value"}, "x") == "value"
assert sync.mailu_enabled({"mailu_email": ["legacy@example.com"]}) is True
assert sync.mailu_enabled({"mailu_enabled": ["off"]}) is False
assert sync.resolve_mailu_email({"username": "fallback", "email": "user@example.com"}, {}) == "user@example.com"
assert sync.resolve_mailu_email({"username": "fallback", "email": "user@other.com"}, {}) == "fallback@example.com"
def test_safe_update_payload_filters_fields(monkeypatch):
sync = load_sync_module(monkeypatch)
payload = sync._safe_update_payload(
{
"username": "user",
"enabled": True,
"email": "user@example.com",
"emailVerified": False,
"firstName": "User",
"lastName": "Example",
"requiredActions": ["UPDATE_PASSWORD", 7],
"attributes": "not-a-dict",
"ignored": "value",
}
)
assert payload == {
"username": "user",
"enabled": True,
"email": "user@example.com",
"emailVerified": False,
"firstName": "User",
"lastName": "Example",
"requiredActions": ["UPDATE_PASSWORD"],
"attributes": {},
}
def test_ensure_system_mailboxes_handles_configurations(monkeypatch, capsys):
sync = load_sync_module(monkeypatch)
ensured = []
monkeypatch.setattr(sync, "MAILU_SYSTEM_USERS", ["postmaster@example.com", "abuse"])
monkeypatch.setattr(sync, "MAILU_SYSTEM_PASSWORD", "")
sync.ensure_system_mailboxes(object())
assert "MAILU_SYSTEM_PASSWORD is missing" in capsys.readouterr().out
def _ensure(cursor, email, password, display_name):
ensured.append((email, password, display_name))
if email == "abuse":
raise RuntimeError("boom")
monkeypatch.setattr(sync, "MAILU_SYSTEM_PASSWORD", "pw")
monkeypatch.setattr(sync, "ensure_mailu_user", _ensure)
sync.ensure_system_mailboxes(object())
out = capsys.readouterr().out
assert ensured == [
("postmaster@example.com", "pw", "postmaster"),
("abuse", "pw", "abuse"),
]
assert "Ensured system mailbox for postmaster@example.com" in out
assert "Failed to ensure system mailbox abuse" in out
def test_main_exits_without_users_or_system_mailboxes(monkeypatch, capsys):
sync = load_sync_module(monkeypatch)
monkeypatch.setattr(sync, "MAILU_SYSTEM_USERS", [])
monkeypatch.setattr(sync, "get_kc_token", lambda: "tok")
monkeypatch.setattr(sync, "kc_get_users", lambda token: [])
sync.main()
assert "No users found; exiting." in capsys.readouterr().out
def test_main_generates_password_and_upserts(monkeypatch): def test_main_generates_password_and_upserts(monkeypatch):
sync = load_sync_module(monkeypatch) sync = load_sync_module(monkeypatch)
monkeypatch.setattr(sync.bcrypt_sha256, "hash", lambda password: f"hash:{password}") monkeypatch.setattr(sync.bcrypt_sha256, "hash", lambda password: f"hash:{password}")

View File

@ -1,134 +0,0 @@
import importlib.util
import io
import pathlib
import types
def load_listener_module(monkeypatch):
monkeypatch.setenv("MAILU_SYNC_WAIT_TIMEOUT_SEC", "0")
module_path = (
pathlib.Path(__file__).resolve().parents[2]
/ "services"
/ "mailu"
/ "scripts"
/ "mailu_sync_listener.py"
)
spec = importlib.util.spec_from_file_location("mailu_sync_listener_testmod", module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module
def _handler_for(listener, body):
handler = listener.Handler.__new__(listener.Handler)
raw = body if isinstance(body, bytes) else body.encode()
handler.headers = {"Content-Length": str(len(raw))}
handler.rfile = io.BytesIO(raw)
handler.responses = []
handler.headers_ended = 0
handler.send_response = lambda code: handler.responses.append(code)
handler.end_headers = lambda: setattr(handler, "headers_ended", handler.headers_ended + 1)
return handler
def test_listener_run_sync_blocking_updates_state(monkeypatch):
listener = load_listener_module(monkeypatch)
monkeypatch.setattr(listener, "time", lambda: 42.0)
monkeypatch.setattr(
listener.subprocess,
"run",
lambda command, check: types.SimpleNamespace(returncode=3),
)
assert listener._run_sync_blocking() == 3
assert listener.last_rc == 3
assert listener.last_run == 42.0
assert listener.sync_done.is_set()
listener.sync_running = True
assert listener._run_sync_blocking() == 0
def test_listener_trigger_sync_async_honors_running_and_debounce(monkeypatch):
listener = load_listener_module(monkeypatch)
starts = []
class _Thread:
def __init__(self, target, daemon):
self.target = target
self.daemon = daemon
def start(self):
starts.append((self.target, self.daemon))
monkeypatch.setattr(listener.threading, "Thread", _Thread)
monkeypatch.setattr(listener, "time", lambda: 100.0)
listener.sync_running = True
assert listener._trigger_sync_async() is False
listener.sync_running = False
listener.last_run = 95.0
assert listener._trigger_sync_async() is False
assert listener._trigger_sync_async(force=True) is True
assert starts and starts[0][1] is True
def test_listener_post_rejects_invalid_json(monkeypatch):
listener = load_listener_module(monkeypatch)
handler = _handler_for(listener, b"{not-json")
handler.do_POST()
assert handler.responses == [400]
assert handler.headers_ended == 1
def test_listener_post_triggers_async_without_wait(monkeypatch):
listener = load_listener_module(monkeypatch)
called = []
monkeypatch.setattr(listener, "_trigger_sync_async", lambda force=False: called.append(force) or True)
handler = _handler_for(listener, '{"force": true}')
handler.do_POST()
assert called == [True]
assert handler.responses == [202]
def test_listener_post_wait_returns_success_or_failure(monkeypatch):
listener = load_listener_module(monkeypatch)
called = []
monkeypatch.setattr(listener, "_trigger_sync_async", lambda force=False: called.append(force) or True)
listener.sync_running = False
listener.last_rc = 0
handler = _handler_for(listener, '{"wait": true, "force": true}')
handler.do_POST()
assert called == [True]
assert handler.responses == [200]
listener.last_rc = 2
handler = _handler_for(listener, '{"wait": true}')
handler.do_POST()
assert handler.responses == [500]
def test_listener_post_wait_keeps_running_request_successful(monkeypatch):
listener = load_listener_module(monkeypatch)
listener.sync_running = True
handler = _handler_for(listener, '{"wait": true}')
handler.do_POST()
assert handler.responses == [200]
def test_listener_log_message_is_quiet(monkeypatch):
listener = load_listener_module(monkeypatch)
handler = listener.Handler.__new__(listener.Handler)
assert handler.log_message("ignored %s", "value") is None

View File

@ -1,73 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
MODE="${1:-dry-run}"
if [[ "$MODE" != "dry-run" && "$MODE" != "active" ]]; then
echo "usage: $0 [dry-run|active]" >&2
exit 2
fi
EXPECTED_DRY_RUN="true"
PROM_MODE="dry_run"
if [[ "$MODE" == "active" ]]; then
EXPECTED_DRY_RUN="false"
PROM_MODE="delete"
fi
KUSTOMIZATION="${KUSTOMIZATION:-maintenance}"
NAMESPACE="${NAMESPACE:-maintenance}"
DEPLOYMENT="${DEPLOYMENT:-ariadne}"
LOCAL_METRICS_PORT="${LOCAL_METRICS_PORT:-18080}"
for cmd in flux kubectl curl grep awk; do
if ! command -v "$cmd" >/dev/null 2>&1; then
echo "missing required command: $cmd" >&2
exit 2
fi
done
echo "[1/5] reconcile Flux kustomization: ${KUSTOMIZATION}"
flux reconcile kustomization "$KUSTOMIZATION" --namespace flux-system --with-source
echo "[2/5] wait for deployment rollout"
kubectl -n "$NAMESPACE" rollout status "deployment/$DEPLOYMENT" --timeout=5m
echo "[3/5] verify ariadne env wiring"
ENV_DUMP="$(kubectl -n "$NAMESPACE" get deployment "$DEPLOYMENT" -o jsonpath='{range .spec.template.spec.containers[0].env[*]}{.name}={.value}{"\n"}{end}')"
echo "$ENV_DUMP" | grep -F "ARIADNE_SCHEDULE_JENKINS_WORKSPACE_CLEANUP=45 */6 * * *"
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_NAMESPACE=jenkins"
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_PVC_PREFIX=pvc-workspace-"
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS=24"
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_DRY_RUN=${EXPECTED_DRY_RUN}"
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_MAX_DELETIONS_PER_RUN=20"
echo "[4/5] scrape /metrics and confirm cleanup metrics are exported"
PF_LOG="$(mktemp)"
METRICS_FILE="$(mktemp)"
cleanup() {
if [[ -n "${PF_PID:-}" ]]; then
kill "$PF_PID" >/dev/null 2>&1 || true
wait "$PF_PID" 2>/dev/null || true
fi
rm -f "$PF_LOG" "$METRICS_FILE"
}
trap cleanup EXIT
kubectl -n "$NAMESPACE" port-forward "deployment/$DEPLOYMENT" "${LOCAL_METRICS_PORT}:8080" >"$PF_LOG" 2>&1 &
PF_PID=$!
sleep 2
curl -fsS "http://127.0.0.1:${LOCAL_METRICS_PORT}/metrics" >"$METRICS_FILE"
grep -F "# HELP ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE"
grep -F "# HELP ariadne_jenkins_workspace_cleanup_objects_total" "$METRICS_FILE"
echo "[5/5] show recent cleanup signal"
if grep -q "ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE"; then
grep "ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE" | grep "mode=\"${PROM_MODE}\"" || true
else
echo "No run counter sample yet for mode=${PROM_MODE}; wait for schedule window and re-run." >&2
fi
echo "Recent cleanup logs (if any):"
kubectl -n "$NAMESPACE" logs "deployment/$DEPLOYMENT" --tail=500 | grep -i "jenkins workspace cleanup" | tail -n 20 || true
echo "verification complete for mode=${MODE}"

View File

@ -5,7 +5,7 @@ metadata:
name: ollama name: ollama
namespace: ai namespace: ai
spec: spec:
replicas: 0 replicas: 1
revisionHistoryLimit: 2 revisionHistoryLimit: 2
strategy: strategy:
type: RollingUpdate type: RollingUpdate
@ -21,7 +21,7 @@ spec:
app: ollama app: ollama
annotations: annotations:
ai.bstein.dev/model: qwen2.5:14b-instruct-q4_0 ai.bstein.dev/model: qwen2.5:14b-instruct-q4_0
ai.bstein.dev/gpu: GPU pool (titan-20/21) ai.bstein.dev/gpu: GPU pool (titan-22/24)
ai.bstein.dev/restartedAt: "2026-01-26T12:00:00Z" ai.bstein.dev/restartedAt: "2026-01-26T12:00:00Z"
spec: spec:
affinity: affinity:
@ -32,13 +32,13 @@ spec:
- key: kubernetes.io/hostname - key: kubernetes.io/hostname
operator: In operator: In
values: values:
- titan-20 - titan-22
- titan-21 - titan-24
runtimeClassName: nvidia runtimeClassName: nvidia
volumes: volumes:
- name: models - name: models
persistentVolumeClaim: persistentVolumeClaim:
claimName: ollama-models-asteria claimName: ollama-models
initContainers: initContainers:
- name: warm-model - name: warm-model
image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d

View File

@ -2,12 +2,12 @@
apiVersion: v1 apiVersion: v1
kind: PersistentVolumeClaim kind: PersistentVolumeClaim
metadata: metadata:
name: ollama-models-asteria name: ollama-models
namespace: ai namespace: ai
spec: spec:
accessModes: accessModes:
- ReadWriteMany - ReadWriteOnce
resources: resources:
requests: requests:
storage: 30Gi storage: 30Gi
storageClassName: asteria storageClassName: astreae

View File

@ -49,15 +49,6 @@ spec:
nodeSelector: nodeSelector:
kubernetes.io/arch: arm64 kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values: ["titan-13", "titan-15", "titan-17", "titan-19"]
imagePullSecrets: imagePullSecrets:
- name: harbor-regcred - name: harbor-regcred
containers: containers:

View File

@ -38,36 +38,6 @@ spec:
nodeSelector: nodeSelector:
kubernetes.io/arch: arm64 kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
containers: containers:
- name: gateway - name: gateway
image: python:3.11-slim image: python:3.11-slim

View File

@ -26,7 +26,7 @@ spec:
imagePullPolicy: Always imagePullPolicy: Always
ports: ports:
- name: http - name: http
containerPort: 8080 containerPort: 80
readinessProbe: readinessProbe:
httpGet: httpGet:
path: / path: /

View File

@ -10,4 +10,4 @@ spec:
ports: ports:
- name: http - name: http
port: 80 port: 80
targetPort: 8080 targetPort: 80

View File

@ -15,14 +15,13 @@ resources:
- frontend-service.yaml - frontend-service.yaml
- backend-deployment.yaml - backend-deployment.yaml
- backend-service.yaml - backend-service.yaml
- vaultwarden-cred-sync-cronjob.yaml
- oneoffs/portal-onboarding-e2e-test-job.yaml - oneoffs/portal-onboarding-e2e-test-job.yaml
- ingress.yaml - ingress.yaml
images: images:
- name: registry.bstein.dev/bstein/bstein-dev-home-frontend - name: registry.bstein.dev/bstein/bstein-dev-home-frontend
newTag: 0.1.1-267 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend:tag"} newTag: 0.1.1-120 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend:tag"}
- name: registry.bstein.dev/bstein/bstein-dev-home-backend - name: registry.bstein.dev/bstein/bstein-dev-home-backend
newTag: 0.1.1-267 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend:tag"} newTag: 0.1.1-123 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend:tag"}
configMapGenerator: configMapGenerator:
- name: chat-ai-gateway - name: chat-ai-gateway
namespace: bstein-dev-home namespace: bstein-dev-home
@ -30,12 +29,6 @@ configMapGenerator:
- gateway.py=scripts/gateway.py - gateway.py=scripts/gateway.py
options: options:
disableNameSuffixHash: true disableNameSuffixHash: true
- name: vaultwarden-cred-sync-script
namespace: bstein-dev-home
files:
- vaultwarden_cred_sync.py=scripts/vaultwarden_cred_sync.py
options:
disableNameSuffixHash: true
- name: portal-onboarding-e2e-tests - name: portal-onboarding-e2e-tests
namespace: bstein-dev-home namespace: bstein-dev-home
files: files:

View File

@ -1,245 +0,0 @@
#!/usr/bin/env python3
from __future__ import annotations
import os
import sys
import time
from datetime import datetime, timezone
from typing import Any, Iterable
import httpx
from atlas_portal import settings
from atlas_portal.keycloak import admin_client
from atlas_portal.vaultwarden import invite_user
VAULTWARDEN_EMAIL_ATTR = "vaultwarden_email"
VAULTWARDEN_STATUS_ATTR = "vaultwarden_status"
VAULTWARDEN_SYNCED_AT_ATTR = "vaultwarden_synced_at"
VAULTWARDEN_RETRY_COOLDOWN_SEC = int(os.getenv("VAULTWARDEN_RETRY_COOLDOWN_SEC", "1800"))
VAULTWARDEN_FAILURE_BAILOUT = int(os.getenv("VAULTWARDEN_FAILURE_BAILOUT", "2"))
def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]:
client = admin_client()
if not client.ready():
raise RuntimeError("keycloak admin client not configured")
url = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users"
first = 0
while True:
headers = _headers_with_retry(client)
# We need attributes for idempotency (vaultwarden_status/vaultwarden_email). Keycloak defaults to a
# brief representation which may omit these.
params = {"first": str(first), "max": str(page_size), "briefRepresentation": "false"}
payload = None
for attempt in range(1, 6):
try:
with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http:
resp = http.get(url, params=params, headers=headers)
resp.raise_for_status()
payload = resp.json()
break
except httpx.HTTPError as exc:
if attempt == 5:
raise
time.sleep(attempt * 2)
if not isinstance(payload, list) or not payload:
return
for item in payload:
if isinstance(item, dict):
yield item
if len(payload) < page_size:
return
first += page_size
def _headers_with_retry(client, attempts: int = 6) -> dict[str, str]:
last_exc: Exception | None = None
for attempt in range(1, attempts + 1):
try:
return client.headers()
except Exception as exc:
last_exc = exc
time.sleep(attempt * 2)
if last_exc:
raise last_exc
raise RuntimeError("failed to fetch keycloak headers")
def _extract_attr(attrs: Any, key: str) -> str:
if not isinstance(attrs, dict):
return ""
raw = attrs.get(key)
if isinstance(raw, list):
for item in raw:
if isinstance(item, str) and item.strip():
return item.strip()
return ""
if isinstance(raw, str) and raw.strip():
return raw.strip()
return ""
def _parse_synced_at(value: str) -> float | None:
value = (value or "").strip()
if not value:
return None
for fmt in ("%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S%z"):
try:
parsed = datetime.strptime(value, fmt)
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=timezone.utc)
return parsed.timestamp()
except ValueError:
continue
return None
def _vaultwarden_email_for_user(user: dict[str, Any]) -> str:
username = (user.get("username") if isinstance(user.get("username"), str) else "") or ""
username = username.strip()
if not username:
return ""
attrs = user.get("attributes")
vaultwarden_email = _extract_attr(attrs, VAULTWARDEN_EMAIL_ATTR)
if vaultwarden_email:
return vaultwarden_email
mailu_email = _extract_attr(attrs, "mailu_email")
if mailu_email:
return mailu_email
email = (user.get("email") if isinstance(user.get("email"), str) else "") or ""
email = email.strip()
if email and email.lower().endswith(f"@{settings.MAILU_DOMAIN.lower()}"):
return email
# Don't guess an internal mailbox address until Mailu sync has run and stored mailu_email.
# This avoids spamming Vaultwarden invites that can never be delivered (unknown recipient).
return ""
def _set_user_attribute_if_missing(username: str, user: dict[str, Any], key: str, value: str) -> None:
value = (value or "").strip()
if not value:
return
existing = _extract_attr(user.get("attributes"), key)
if existing:
return
admin_client().set_user_attribute(username, key, value)
def _set_user_attribute(username: str, key: str, value: str) -> None:
value = (value or "").strip()
if not value:
return
admin_client().set_user_attribute(username, key, value)
def main() -> int:
processed = 0
created = 0
skipped = 0
failures = 0
consecutive_failures = 0
for user in _iter_keycloak_users():
username = (user.get("username") if isinstance(user.get("username"), str) else "") or ""
username = username.strip()
if not username:
skipped += 1
continue
enabled = user.get("enabled")
if enabled is False:
skipped += 1
continue
if user.get("serviceAccountClientId") or username.startswith("service-account-"):
skipped += 1
continue
# Fetch the full user payload so we can reliably read attributes (and skip re-invites).
user_id = (user.get("id") if isinstance(user.get("id"), str) else "") or ""
user_id = user_id.strip()
full_user = user
if user_id:
try:
full_user = admin_client().get_user(user_id)
except Exception:
full_user = user
current_status = _extract_attr(full_user.get("attributes"), VAULTWARDEN_STATUS_ATTR)
current_synced_at = _extract_attr(full_user.get("attributes"), VAULTWARDEN_SYNCED_AT_ATTR)
current_synced_ts = _parse_synced_at(current_synced_at)
if current_status in {"rate_limited", "error"} and current_synced_ts:
if time.time() - current_synced_ts < VAULTWARDEN_RETRY_COOLDOWN_SEC:
skipped += 1
continue
email = _vaultwarden_email_for_user(full_user)
if not email:
print(f"skip {username}: missing email", file=sys.stderr)
skipped += 1
continue
try:
_set_user_attribute_if_missing(username, full_user, VAULTWARDEN_EMAIL_ATTR, email)
except Exception:
pass
# If we've already successfully invited or confirmed presence, do not re-invite on every cron run.
# Vaultwarden returns 409 for "already exists", which is idempotent but noisy and can trigger rate limits.
if current_status in {"invited", "already_present"}:
if not current_synced_at:
try:
_set_user_attribute(
username,
VAULTWARDEN_SYNCED_AT_ATTR,
time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
)
except Exception:
pass
skipped += 1
continue
processed += 1
result = invite_user(email)
if result.ok:
created += 1
consecutive_failures = 0
print(f"ok {username}: {result.status}")
try:
_set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status)
_set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
except Exception:
pass
else:
failures += 1
if result.status in {"rate_limited", "error"}:
consecutive_failures += 1
print(f"err {username}: {result.status} {result.detail}", file=sys.stderr)
try:
_set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status)
_set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
except Exception:
pass
if consecutive_failures >= VAULTWARDEN_FAILURE_BAILOUT:
print("vaultwarden: too many consecutive failures; aborting run", file=sys.stderr)
break
print(
f"done processed={processed} created_or_present={created} skipped={skipped} failures={failures}",
file=sys.stderr,
)
return 0 if failures == 0 else 2
if __name__ == "__main__":
raise SystemExit(main())

View File

@ -1,86 +0,0 @@
# services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: vaultwarden-cred-sync
namespace: bstein-dev-home
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/15 * * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "bstein-dev-home"
vault.hashicorp.com/agent-inject-secret-portal-env.sh: "kv/data/atlas/portal/atlas-portal-db"
vault.hashicorp.com/agent-inject-template-portal-env.sh: |
{{ with secret "kv/data/atlas/portal/atlas-portal-db" }}
export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}"
{{ end }}
{{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }}
export KEYCLOAK_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/chat-ai-keys-runtime" }}
export CHAT_KEY_MATRIX="{{ .Data.data.matrix }}"
export CHAT_KEY_HOMEPAGE="{{ .Data.data.homepage }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/portal-e2e-client" }}
export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}"
export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}"
{{ end }}
spec:
serviceAccountName: bstein-dev-home
restartPolicy: Never
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
imagePullSecrets:
- name: harbor-regcred
containers:
- name: sync
image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-95
imagePullPolicy: Always
command: ["/bin/sh", "-c"]
args:
- >-
. /vault/secrets/portal-env.sh
&& exec python /scripts/vaultwarden_cred_sync.py
env:
- name: PYTHONPATH
value: /app
- name: KEYCLOAK_ENABLED
value: "true"
- name: KEYCLOAK_REALM
value: atlas
- name: KEYCLOAK_ADMIN_URL
value: http://keycloak.sso.svc.cluster.local
- name: KEYCLOAK_ADMIN_REALM
value: atlas
- name: KEYCLOAK_ADMIN_CLIENT_ID
value: bstein-dev-home-admin
- name: HTTP_CHECK_TIMEOUT_SEC
value: "20"
- name: VAULTWARDEN_ADMIN_SESSION_TTL_SEC
value: "900"
- name: VAULTWARDEN_RETRY_COOLDOWN_SEC
value: "1800"
- name: VAULTWARDEN_FAILURE_BAILOUT
value: "2"
volumeMounts:
- name: vaultwarden-cred-sync-script
mountPath: /scripts
readOnly: true
volumes:
- name: vaultwarden-cred-sync-script
configMap:
name: vaultwarden-cred-sync-script
defaultMode: 0555

View File

@ -1,471 +0,0 @@
# services/comms/guest-name-job.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: guest-name-randomizer
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/1 * * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "comms"
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
vault.hashicorp.com/agent-inject-template-turn-secret: |
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
vault.hashicorp.com/agent-inject-template-livekit-primary: |
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-bot-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-seeder-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-matrix: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-homepage: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
spec:
restartPolicy: Never
serviceAccountName: comms-vault
nodeSelector:
hardware: rpi5
volumes:
- name: vault-scripts
configMap:
name: comms-vault-env
defaultMode: 0555
containers:
- name: rename
image: registry.bstein.dev/bstein/comms-guest-tools:0.1.0
volumeMounts:
- name: vault-scripts
mountPath: /vault/scripts
readOnly: true
env:
- name: SYNAPSE_BASE
value: http://othrys-synapse-matrix-synapse:8008
- name: MAS_ADMIN_CLIENT_ID
value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM
- name: MAS_ADMIN_CLIENT_SECRET_FILE
value: /vault/secrets/mas-admin-secret
- name: MAS_ADMIN_API_BASE
value: http://matrix-authentication-service:8081/api/admin/v1
- name: MAS_TOKEN_URL
value: http://matrix-authentication-service:8080/oauth2/token
- name: SEEDER_USER
value: othrys-seeder
- name: PGHOST
value: postgres-service.postgres.svc.cluster.local
- name: PGPORT
value: "5432"
- name: PGDATABASE
value: synapse
- name: PGUSER
value: synapse
command:
- /bin/sh
- -c
- |
set -euo pipefail
. /vault/scripts/comms_vault_env.sh
python - <<'PY'
import base64
import os
import random
import requests
import time
import urllib.parse
import psycopg2
ADJ = [
"brisk","calm","eager","gentle","merry","nifty","rapid","sunny","witty","zesty",
"amber","bold","bright","crisp","daring","frosty","glad","jolly","lively","mellow",
"quiet","ripe","serene","spry","tidy","vivid","warm","wild","clever","kind",
]
NOUN = [
"otter","falcon","comet","ember","grove","harbor","meadow","raven","river","summit",
"breeze","cedar","cinder","cove","delta","forest","glade","lark","marsh","peak",
"pine","quartz","reef","ridge","sable","sage","shore","thunder","vale","zephyr",
]
BASE = os.environ["SYNAPSE_BASE"]
MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"]
MAS_ADMIN_CLIENT_SECRET_FILE = os.environ["MAS_ADMIN_CLIENT_SECRET_FILE"]
MAS_ADMIN_API_BASE = os.environ["MAS_ADMIN_API_BASE"].rstrip("/")
MAS_TOKEN_URL = os.environ["MAS_TOKEN_URL"]
SEEDER_USER = os.environ["SEEDER_USER"]
ROOM_ALIAS = "#othrys:live.bstein.dev"
SERVER_NAME = "live.bstein.dev"
STALE_GUEST_MS = 14 * 24 * 60 * 60 * 1000
def mas_admin_token():
with open(MAS_ADMIN_CLIENT_SECRET_FILE, "r", encoding="utf-8") as f:
secret = f.read().strip()
basic = base64.b64encode(f"{MAS_ADMIN_CLIENT_ID}:{secret}".encode()).decode()
last_err = None
for attempt in range(5):
try:
r = requests.post(
MAS_TOKEN_URL,
headers={"Authorization": f"Basic {basic}"},
data={"grant_type": "client_credentials", "scope": "urn:mas:admin"},
timeout=30,
)
r.raise_for_status()
return r.json()["access_token"]
except Exception as exc: # noqa: BLE001
last_err = exc
time.sleep(2 ** attempt)
raise last_err
def mas_user_id(token, username):
r = requests.get(
f"{MAS_ADMIN_API_BASE}/users/by-username/{urllib.parse.quote(username)}",
headers={"Authorization": f"Bearer {token}"},
timeout=30,
)
r.raise_for_status()
return r.json()["data"]["id"]
def mas_personal_session(token, user_id):
r = requests.post(
f"{MAS_ADMIN_API_BASE}/personal-sessions",
headers={"Authorization": f"Bearer {token}"},
json={
"actor_user_id": user_id,
"human_name": "guest-name-randomizer",
"scope": "urn:matrix:client:api:*",
"expires_in": 300,
},
timeout=30,
)
r.raise_for_status()
data = r.json().get("data", {}).get("attributes", {}) or {}
return data["access_token"], r.json()["data"]["id"]
def mas_revoke_session(token, session_id):
requests.post(
f"{MAS_ADMIN_API_BASE}/personal-sessions/{urllib.parse.quote(session_id)}/revoke",
headers={"Authorization": f"Bearer {token}"},
json={},
timeout=30,
)
def resolve_alias(token, alias):
headers = {"Authorization": f"Bearer {token}"}
enc = urllib.parse.quote(alias)
r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=headers)
r.raise_for_status()
return r.json()["room_id"]
def room_members(token, room_id):
headers = {"Authorization": f"Bearer {token}"}
r = requests.get(f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members", headers=headers)
r.raise_for_status()
members = set()
existing_names = set()
for ev in r.json().get("chunk", []):
user_id = ev.get("state_key")
if user_id:
members.add(user_id)
disp = (ev.get("content") or {}).get("displayname")
if disp:
existing_names.add(disp)
return members, existing_names
def mas_list_users(token):
headers = {"Authorization": f"Bearer {token}"}
users = []
cursor = None
while True:
url = f"{MAS_ADMIN_API_BASE}/users?page[size]=100"
if cursor:
url += f"&page[after]={urllib.parse.quote(cursor)}"
r = requests.get(url, headers=headers, timeout=30)
r.raise_for_status()
data = r.json().get("data", [])
if not data:
break
users.extend(data)
cursor = data[-1].get("meta", {}).get("page", {}).get("cursor")
if not cursor:
break
return users
def synapse_list_users(token):
headers = {"Authorization": f"Bearer {token}"}
users = []
from_token = None
while True:
url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100"
if from_token:
url += f"&from={urllib.parse.quote(from_token)}"
r = requests.get(url, headers=headers, timeout=30)
r.raise_for_status()
payload = r.json()
users.extend(payload.get("users", []))
from_token = payload.get("next_token")
if not from_token:
break
return users
def should_prune_guest(entry, now_ms):
if not entry.get("is_guest"):
return False
last_seen = entry.get("last_seen_ts")
if last_seen is None:
return False
try:
last_seen = int(last_seen)
except (TypeError, ValueError):
return False
return now_ms - last_seen > STALE_GUEST_MS
def prune_guest(token, user_id):
headers = {"Authorization": f"Bearer {token}"}
try:
r = requests.delete(
f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
headers=headers,
params={"erase": "true"},
timeout=30,
)
except Exception as exc: # noqa: BLE001
print(f"guest prune failed for {user_id}: {exc}")
return False
if r.status_code in (200, 202, 204, 404):
return True
print(f"guest prune failed for {user_id}: {r.status_code} {r.text}")
return False
def user_id_for_username(username):
return f"@{username}:live.bstein.dev"
def get_displayname(token, user_id):
headers = {"Authorization": f"Bearer {token}"}
r = requests.get(f"{BASE}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}", headers=headers)
r.raise_for_status()
return r.json().get("displayname")
def get_displayname_admin(token, user_id):
headers = {"Authorization": f"Bearer {token}"}
r = requests.get(
f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
headers=headers,
timeout=30,
)
if r.status_code == 404:
return None
r.raise_for_status()
return r.json().get("displayname")
def set_displayname(token, room_id, user_id, name, in_room):
headers = {"Authorization": f"Bearer {token}"}
payload = {"displayname": name}
r = requests.put(
f"{BASE}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}/displayname",
headers=headers,
json=payload,
)
r.raise_for_status()
if not in_room:
return
state_url = f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.member/{urllib.parse.quote(user_id)}"
content = {"membership": "join", "displayname": name}
requests.put(state_url, headers=headers, json=content, timeout=30)
def set_displayname_admin(token, user_id, name):
headers = {"Authorization": f"Bearer {token}"}
payload = {"displayname": name}
r = requests.put(
f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
headers=headers,
json=payload,
timeout=30,
)
if r.status_code in (200, 201, 204):
return True
return False
def needs_rename_username(username):
return username.isdigit() or username.startswith("guest-")
def needs_rename_display(display):
return not display or display.isdigit() or display.startswith("guest-")
def db_rename_numeric(existing_names):
profile_rows = []
profile_index = {}
users = []
conn = psycopg2.connect(
host=os.environ["PGHOST"],
port=int(os.environ["PGPORT"]),
dbname=os.environ["PGDATABASE"],
user=os.environ["PGUSER"],
password=os.environ["PGPASSWORD"],
)
try:
with conn:
with conn.cursor() as cur:
cur.execute(
"SELECT user_id, full_user_id, displayname FROM profiles WHERE full_user_id ~ %s",
(f"^@\\d+:{SERVER_NAME}$",),
)
profile_rows = cur.fetchall()
profile_index = {row[1]: row for row in profile_rows}
for user_id, full_user_id, display in profile_rows:
if display and not needs_rename_display(display):
continue
new = None
for _ in range(30):
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
if candidate not in existing_names:
new = candidate
existing_names.add(candidate)
break
if not new:
continue
cur.execute(
"UPDATE profiles SET displayname = %s WHERE full_user_id = %s",
(new, full_user_id),
)
cur.execute(
"SELECT name FROM users WHERE name ~ %s",
(f"^@\\d+:{SERVER_NAME}$",),
)
users = [row[0] for row in cur.fetchall()]
if not users:
return
cur.execute(
"SELECT user_id, full_user_id FROM profiles WHERE full_user_id = ANY(%s)",
(users,),
)
for existing_full in cur.fetchall():
profile_index.setdefault(existing_full[1], existing_full)
for full_user_id in users:
if full_user_id in profile_index:
continue
localpart = full_user_id.split(":", 1)[0].lstrip("@")
new = None
for _ in range(30):
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
if candidate not in existing_names:
new = candidate
existing_names.add(candidate)
break
if not new:
continue
cur.execute(
"INSERT INTO profiles (user_id, displayname, full_user_id) VALUES (%s, %s, %s) "
"ON CONFLICT (full_user_id) DO UPDATE SET displayname = EXCLUDED.displayname",
(localpart, new, full_user_id),
)
finally:
conn.close()
admin_token = mas_admin_token()
seeder_id = mas_user_id(admin_token, SEEDER_USER)
seeder_token, seeder_session = mas_personal_session(admin_token, seeder_id)
try:
room_id = resolve_alias(seeder_token, ROOM_ALIAS)
members, existing = room_members(seeder_token, room_id)
users = mas_list_users(admin_token)
mas_usernames = set()
for user in users:
attrs = user.get("attributes") or {}
username = attrs.get("username") or ""
if username:
mas_usernames.add(username)
legacy_guest = attrs.get("legacy_guest")
if not username:
continue
if not (legacy_guest or needs_rename_username(username)):
continue
user_id = user_id_for_username(username)
access_token, session_id = mas_personal_session(admin_token, user["id"])
try:
display = get_displayname(access_token, user_id)
if display and not needs_rename_display(display):
continue
new = None
for _ in range(30):
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
if candidate not in existing:
new = candidate
existing.add(candidate)
break
if not new:
continue
set_displayname(access_token, room_id, user_id, new, user_id in members)
finally:
mas_revoke_session(admin_token, session_id)
try:
entries = synapse_list_users(seeder_token)
except Exception as exc: # noqa: BLE001
print(f"synapse admin list skipped: {exc}")
entries = []
now_ms = int(time.time() * 1000)
for entry in entries:
user_id = entry.get("name") or ""
if not user_id.startswith("@"):
continue
localpart = user_id.split(":", 1)[0].lstrip("@")
if localpart in mas_usernames:
continue
is_guest = entry.get("is_guest")
if is_guest and should_prune_guest(entry, now_ms):
if prune_guest(seeder_token, user_id):
continue
if not (is_guest or needs_rename_username(localpart)):
continue
display = get_displayname_admin(seeder_token, user_id)
if display and not needs_rename_display(display):
continue
new = None
for _ in range(30):
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
if candidate not in existing:
new = candidate
existing.add(candidate)
break
if not new:
continue
if not set_displayname_admin(seeder_token, user_id, new):
continue
db_rename_numeric(existing)
finally:
mas_revoke_session(admin_token, seeder_session)
PY

View File

@ -34,11 +34,7 @@ resources:
- livekit-token-deployment.yaml - livekit-token-deployment.yaml
- livekit.yaml - livekit.yaml
- coturn.yaml - coturn.yaml
- seed-othrys-room.yaml
- guest-name-job.yaml
- oneoffs/othrys-kick-numeric-job.yaml - oneoffs/othrys-kick-numeric-job.yaml
- pin-othrys-job.yaml
- reset-othrys-room-job.yaml
- oneoffs/bstein-force-leave-job.yaml - oneoffs/bstein-force-leave-job.yaml
- livekit-ingress.yaml - livekit-ingress.yaml
- livekit-middlewares.yaml - livekit-middlewares.yaml

View File

@ -1,169 +0,0 @@
# services/comms/pin-othrys-job.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: pin-othrys-invite
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/30 * * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "comms"
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
vault.hashicorp.com/agent-inject-template-turn-secret: |
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
vault.hashicorp.com/agent-inject-template-livekit-primary: |
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-bot-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-seeder-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-matrix: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-homepage: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
spec:
restartPolicy: Never
serviceAccountName: comms-vault
containers:
- name: pin
image: python:3.11-slim
env:
- name: SYNAPSE_BASE
value: http://othrys-synapse-matrix-synapse:8008
- name: AUTH_BASE
value: http://matrix-authentication-service:8080
- name: SEEDER_USER
value: othrys-seeder
command:
- /bin/sh
- -c
- |
set -euo pipefail
. /vault/scripts/comms_vault_env.sh
pip install --no-cache-dir requests >/dev/null
python - <<'PY'
import os, requests, urllib.parse
BASE = os.environ["SYNAPSE_BASE"]
AUTH_BASE = os.environ.get("AUTH_BASE", BASE)
ROOM_ALIAS = "#othrys:live.bstein.dev"
MESSAGE = (
"Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join "
"and choose 'Continue' -> 'Join as guest'."
)
def auth(token): return {"Authorization": f"Bearer {token}"}
def canon_user(user):
u = (user or "").strip()
if u.startswith("@") and ":" in u:
return u
u = u.lstrip("@")
if ":" in u:
return f"@{u}"
return f"@{u}:live.bstein.dev"
def login(user, password):
r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={
"type": "m.login.password",
"identifier": {"type": "m.id.user", "user": canon_user(user)},
"password": password,
})
r.raise_for_status()
return r.json()["access_token"]
def resolve(alias, token):
enc = urllib.parse.quote(alias)
r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token))
r.raise_for_status()
return r.json()["room_id"]
def get_pinned(room_id, token):
r = requests.get(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events",
headers=auth(token),
)
if r.status_code == 404:
return []
r.raise_for_status()
return r.json().get("pinned", [])
def get_event(room_id, event_id, token):
r = requests.get(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/event/{urllib.parse.quote(event_id)}",
headers=auth(token),
)
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()
def send(room_id, token, body):
r = requests.post(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message",
headers=auth(token),
json={"msgtype": "m.text", "body": body},
)
r.raise_for_status()
return r.json()["event_id"]
def pin(room_id, token, event_id):
r = requests.put(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events",
headers=auth(token),
json={"pinned": [event_id]},
)
r.raise_for_status()
token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"])
room_id = resolve(ROOM_ALIAS, token)
for event_id in get_pinned(room_id, token):
ev = get_event(room_id, event_id, token)
if ev and ev.get("content", {}).get("body") == MESSAGE:
raise SystemExit(0)
eid = send(room_id, token, MESSAGE)
pin(room_id, token, eid)
PY
volumeMounts:
- name: vault-scripts
mountPath: /vault/scripts
readOnly: true
volumes:
- name: vault-scripts
configMap:
name: comms-vault-env
defaultMode: 0555

View File

@ -1,312 +0,0 @@
# services/comms/reset-othrys-room-job.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: othrys-room-reset
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "0 0 1 1 *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "comms"
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
vault.hashicorp.com/agent-inject-template-turn-secret: |
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
vault.hashicorp.com/agent-inject-template-livekit-primary: |
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-bot-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-seeder-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-matrix: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-homepage: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
spec:
restartPolicy: Never
serviceAccountName: comms-vault
containers:
- name: reset
image: python:3.11-slim
env:
- name: SYNAPSE_BASE
value: http://othrys-synapse-matrix-synapse:8008
- name: AUTH_BASE
value: http://matrix-authentication-service:8080
- name: SERVER_NAME
value: live.bstein.dev
- name: ROOM_ALIAS
value: "#othrys:live.bstein.dev"
- name: ROOM_NAME
value: Othrys
- name: PIN_MESSAGE
value: "Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join and choose 'Continue' -> 'Join as guest'."
- name: SEEDER_USER
value: othrys-seeder
- name: BOT_USER
value: atlasbot
command:
- /bin/sh
- -c
- |
set -euo pipefail
. /vault/scripts/comms_vault_env.sh
pip install --no-cache-dir requests >/dev/null
python - <<'PY'
import os
import time
import urllib.parse
import requests
BASE = os.environ["SYNAPSE_BASE"]
AUTH_BASE = os.environ.get("AUTH_BASE", BASE)
SERVER_NAME = os.environ.get("SERVER_NAME", "live.bstein.dev")
ROOM_ALIAS = os.environ.get("ROOM_ALIAS", "#othrys:live.bstein.dev")
ROOM_NAME = os.environ.get("ROOM_NAME", "Othrys")
PIN_MESSAGE = os.environ["PIN_MESSAGE"]
SEEDER_USER = os.environ["SEEDER_USER"]
SEEDER_PASS = os.environ["SEEDER_PASS"]
BOT_USER = os.environ["BOT_USER"]
POWER_LEVELS = {
"ban": 50,
"events": {
"m.room.avatar": 50,
"m.room.canonical_alias": 50,
"m.room.encryption": 100,
"m.room.history_visibility": 100,
"m.room.name": 50,
"m.room.power_levels": 100,
"m.room.server_acl": 100,
"m.room.tombstone": 100,
},
"events_default": 0,
"historical": 100,
"invite": 50,
"kick": 50,
"m.call.invite": 50,
"redact": 50,
"state_default": 50,
"users": {f"@{SEEDER_USER}:{SERVER_NAME}": 100},
"users_default": 0,
}
def auth(token):
return {"Authorization": f"Bearer {token}"}
def canon_user(user):
u = (user or "").strip()
if u.startswith("@") and ":" in u:
return u
u = u.lstrip("@")
if ":" in u:
return f"@{u}"
return f"@{u}:{SERVER_NAME}"
def login(user, password):
r = requests.post(
f"{AUTH_BASE}/_matrix/client/v3/login",
json={
"type": "m.login.password",
"identifier": {"type": "m.id.user", "user": canon_user(user)},
"password": password,
},
)
if r.status_code != 200:
raise SystemExit(f"login failed: {r.status_code} {r.text}")
return r.json()["access_token"]
def resolve_alias(token, alias):
enc = urllib.parse.quote(alias)
r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token))
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()["room_id"]
def create_room(token):
r = requests.post(
f"{BASE}/_matrix/client/v3/createRoom",
headers=auth(token),
json={
"preset": "public_chat",
"name": ROOM_NAME,
"room_version": "11",
},
)
r.raise_for_status()
return r.json()["room_id"]
def put_state(token, room_id, ev_type, content):
r = requests.put(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/{ev_type}",
headers=auth(token),
json=content,
)
r.raise_for_status()
def set_directory_visibility(token, room_id, visibility):
r = requests.put(
f"{BASE}/_matrix/client/v3/directory/list/room/{urllib.parse.quote(room_id)}",
headers=auth(token),
json={"visibility": visibility},
)
r.raise_for_status()
def delete_alias(token, alias):
enc = urllib.parse.quote(alias)
r = requests.delete(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token))
if r.status_code in (200, 202, 404):
return
r.raise_for_status()
def put_alias(token, alias, room_id):
enc = urllib.parse.quote(alias)
r = requests.put(
f"{BASE}/_matrix/client/v3/directory/room/{enc}",
headers=auth(token),
json={"room_id": room_id},
)
r.raise_for_status()
def list_joined_members(token, room_id):
r = requests.get(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members?membership=join",
headers=auth(token),
)
r.raise_for_status()
members = []
for ev in r.json().get("chunk", []):
if ev.get("type") != "m.room.member":
continue
uid = ev.get("state_key")
if not isinstance(uid, str) or not uid.startswith("@"):
continue
members.append(uid)
return members
def invite_user(token, room_id, user_id):
r = requests.post(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/invite",
headers=auth(token),
json={"user_id": user_id},
)
if r.status_code in (200, 202):
return
r.raise_for_status()
def send_message(token, room_id, body):
r = requests.post(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message",
headers=auth(token),
json={"msgtype": "m.text", "body": body},
)
r.raise_for_status()
return r.json()["event_id"]
def login_with_retry():
last = None
for attempt in range(1, 6):
try:
return login(SEEDER_USER, SEEDER_PASS)
except Exception as exc: # noqa: BLE001
last = exc
time.sleep(attempt * 2)
raise last
token = login_with_retry()
old_room_id = resolve_alias(token, ROOM_ALIAS)
if not old_room_id:
raise SystemExit(f"alias {ROOM_ALIAS} not found; refusing to proceed")
new_room_id = create_room(token)
# Configure the new room.
put_state(token, new_room_id, "m.room.join_rules", {"join_rule": "public"})
put_state(token, new_room_id, "m.room.guest_access", {"guest_access": "can_join"})
put_state(token, new_room_id, "m.room.history_visibility", {"history_visibility": "shared"})
put_state(token, new_room_id, "m.room.power_levels", POWER_LEVELS)
# Move the alias.
delete_alias(token, ROOM_ALIAS)
put_alias(token, ROOM_ALIAS, new_room_id)
put_state(token, new_room_id, "m.room.canonical_alias", {"alias": ROOM_ALIAS})
set_directory_visibility(token, new_room_id, "public")
# Invite the bot and all joined members of the old room.
bot_user_id = f"@{BOT_USER}:{SERVER_NAME}"
invite_user(token, new_room_id, bot_user_id)
for uid in list_joined_members(token, old_room_id):
if uid == f"@{SEEDER_USER}:{SERVER_NAME}":
continue
localpart = uid.split(":", 1)[0].lstrip("@")
if localpart.isdigit():
continue
invite_user(token, new_room_id, uid)
# Pin the guest invite message in the new room.
event_id = send_message(token, new_room_id, PIN_MESSAGE)
put_state(token, new_room_id, "m.room.pinned_events", {"pinned": [event_id]})
# De-list and tombstone the old room.
set_directory_visibility(token, old_room_id, "private")
put_state(token, old_room_id, "m.room.join_rules", {"join_rule": "invite"})
put_state(token, old_room_id, "m.room.guest_access", {"guest_access": "forbidden"})
put_state(
token,
old_room_id,
"m.room.tombstone",
{"body": "Othrys has been reset. Please join the new room.", "replacement_room": new_room_id},
)
send_message(
token,
old_room_id,
"Othrys was reset. Join the new room at https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join",
)
print(f"old_room_id={old_room_id}")
print(f"new_room_id={new_room_id}")
PY
volumeMounts:
- name: vault-scripts
mountPath: /vault/scripts
readOnly: true
volumes:
- name: vault-scripts
configMap:
name: comms-vault-env
defaultMode: 0555

View File

@ -1,185 +0,0 @@
# services/comms/seed-othrys-room.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: seed-othrys-room
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/10 * * * *"
suspend: true
concurrencyPolicy: Forbid
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "comms"
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
vault.hashicorp.com/agent-inject-template-turn-secret: |
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
vault.hashicorp.com/agent-inject-template-livekit-primary: |
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-bot-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-seeder-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-matrix: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-homepage: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
spec:
restartPolicy: Never
serviceAccountName: comms-vault
containers:
- name: seed
image: python:3.11-slim
env:
- name: SYNAPSE_BASE
value: http://othrys-synapse-matrix-synapse:8008
- name: AUTH_BASE
value: http://matrix-authentication-service:8080
- name: SEEDER_USER
value: othrys-seeder
- name: BOT_USER
value: atlasbot
command:
- /bin/sh
- -c
- |
set -euo pipefail
. /vault/scripts/comms_vault_env.sh
pip install --no-cache-dir requests pyyaml >/dev/null
python - <<'PY'
import os, requests, urllib.parse
BASE = os.environ["SYNAPSE_BASE"]
AUTH_BASE = os.environ.get("AUTH_BASE", BASE)
def canon_user(user):
u = (user or "").strip()
if u.startswith("@") and ":" in u:
return u
u = u.lstrip("@")
if ":" in u:
return f"@{u}"
return f"@{u}:live.bstein.dev"
def login(user, password):
r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={
"type": "m.login.password",
"identifier": {"type": "m.id.user", "user": canon_user(user)},
"password": password,
})
if r.status_code != 200:
raise SystemExit(f"login failed: {r.status_code} {r.text}")
return r.json()["access_token"]
def ensure_user(token, localpart, password, admin):
headers = {"Authorization": f"Bearer {token}"}
user_id = f"@{localpart}:live.bstein.dev"
url = f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}"
res = requests.get(url, headers=headers)
if res.status_code == 200:
return
payload = {"password": password, "admin": admin, "deactivated": False}
create = requests.put(url, headers=headers, json=payload)
if create.status_code not in (200, 201):
raise SystemExit(f"create user {user_id} failed: {create.status_code} {create.text}")
def ensure_room(token):
headers = {"Authorization": f"Bearer {token}"}
alias = "#othrys:live.bstein.dev"
alias_enc = "%23othrys%3Alive.bstein.dev"
exists = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{alias_enc}", headers=headers)
if exists.status_code == 200:
room_id = exists.json()["room_id"]
else:
create = requests.post(f"{BASE}/_matrix/client/v3/createRoom", headers=headers, json={
"preset": "public_chat",
"name": "Othrys",
"room_alias_name": "othrys",
"initial_state": [],
"power_level_content_override": {"events_default": 0, "users_default": 0, "state_default": 50},
})
if create.status_code not in (200, 409):
raise SystemExit(f"create room failed: {create.status_code} {create.text}")
exists = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{alias_enc}", headers=headers)
room_id = exists.json()["room_id"]
state_events = [
("m.room.join_rules", {"join_rule": "public"}),
("m.room.guest_access", {"guest_access": "can_join"}),
("m.room.history_visibility", {"history_visibility": "shared"}),
("m.room.canonical_alias", {"alias": alias}),
]
for ev_type, content in state_events:
requests.put(f"{BASE}/_matrix/client/v3/rooms/{room_id}/state/{ev_type}", headers=headers, json=content)
requests.put(f"{BASE}/_matrix/client/v3/directory/list/room/{room_id}", headers=headers, json={"visibility": "public"})
return room_id
def join_user(token, room_id, user_id):
headers = {"Authorization": f"Bearer {token}"}
requests.post(f"{BASE}/_synapse/admin/v1/join/{urllib.parse.quote(room_id)}", headers=headers, json={"user_id": user_id})
def join_all_locals(token, room_id):
headers = {"Authorization": f"Bearer {token}"}
users = []
from_token = None
while True:
url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100"
if from_token:
url += f"&from={from_token}"
res = requests.get(url, headers=headers).json()
users.extend([u["name"] for u in res.get("users", [])])
from_token = res.get("next_token")
if not from_token:
break
for uid in users:
join_user(token, room_id, uid)
token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"])
ensure_user(token, os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"], admin=True)
ensure_user(token, os.environ["BOT_USER"], os.environ["BOT_PASS"], admin=False)
room_id = ensure_room(token)
join_user(token, room_id, f"@{os.environ['BOT_USER']}:live.bstein.dev")
join_all_locals(token, room_id)
PY
volumeMounts:
- name: synapse-config
mountPath: /config
readOnly: true
- name: vault-scripts
mountPath: /vault/scripts
readOnly: true
volumes:
- name: synapse-config
secret:
secretName: othrys-synapse-matrix-synapse
- name: vault-scripts
configMap:
name: comms-vault-env
defaultMode: 0555

View File

@ -1,56 +0,0 @@
# services/finance/firefly-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: firefly-cron
namespace: finance
spec:
schedule: "0 3 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 1
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "finance"
vault.hashicorp.com/agent-inject-secret-firefly-cron-token: "kv/data/atlas/finance/firefly-secrets"
vault.hashicorp.com/agent-inject-template-firefly-cron-token: |
{{- with secret "kv/data/atlas/finance/firefly-secrets" -}}
{{ .Data.data.STATIC_CRON_TOKEN }}
{{- end -}}
spec:
serviceAccountName: finance-vault
restartPolicy: Never
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 70
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: cron
image: curlimages/curl:8.5.0
command: ["/bin/sh", "-c"]
args:
- |
set -eu
token="$(cat /vault/secrets/firefly-cron-token)"
curl -fsS "http://firefly.finance.svc.cluster.local/api/v1/cron/${token}"

View File

@ -1,92 +0,0 @@
# services/finance/firefly-user-sync-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: firefly-user-sync
namespace: finance
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "0 6 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "finance"
vault.hashicorp.com/agent-inject-secret-firefly-env.sh: "kv/data/atlas/finance/firefly-db"
vault.hashicorp.com/agent-inject-template-firefly-env.sh: |
{{ with secret "kv/data/atlas/finance/firefly-db" }}
export DB_CONNECTION="pgsql"
export DB_HOST="{{ .Data.data.DB_HOST }}"
export DB_PORT="{{ .Data.data.DB_PORT }}"
export DB_DATABASE="{{ .Data.data.DB_DATABASE }}"
export DB_USERNAME="{{ .Data.data.DB_USERNAME }}"
export DB_PASSWORD="$(cat /vault/secrets/firefly-db-password)"
{{ end }}
{{ with secret "kv/data/atlas/finance/firefly-secrets" }}
export APP_KEY="$(cat /vault/secrets/firefly-app-key)"
{{ end }}
vault.hashicorp.com/agent-inject-secret-firefly-db-password: "kv/data/atlas/finance/firefly-db"
vault.hashicorp.com/agent-inject-template-firefly-db-password: |
{{- with secret "kv/data/atlas/finance/firefly-db" -}}
{{ .Data.data.DB_PASSWORD }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-firefly-app-key: "kv/data/atlas/finance/firefly-secrets"
vault.hashicorp.com/agent-inject-template-firefly-app-key: |
{{- with secret "kv/data/atlas/finance/firefly-secrets" -}}
{{ .Data.data.APP_KEY }}
{{- end -}}
spec:
serviceAccountName: finance-vault
restartPolicy: Never
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 70
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: sync
image: fireflyiii/core:version-6.4.15
command: ["/bin/sh", "-c"]
args:
- |
set -eu
. /vault/secrets/firefly-env.sh
exec php /scripts/firefly_user_sync.php
env:
- name: APP_ENV
value: production
- name: APP_DEBUG
value: "false"
- name: TZ
value: Etc/UTC
volumeMounts:
- name: firefly-user-sync-script
mountPath: /scripts
readOnly: true
volumes:
- name: firefly-user-sync-script
configMap:
name: firefly-user-sync-script
defaultMode: 0555

View File

@ -12,8 +12,6 @@ resources:
- oneoffs/finance-secrets-ensure-job.yaml - oneoffs/finance-secrets-ensure-job.yaml
- actual-budget-deployment.yaml - actual-budget-deployment.yaml
- firefly-deployment.yaml - firefly-deployment.yaml
- firefly-user-sync-cronjob.yaml
- firefly-cronjob.yaml
- actual-budget-service.yaml - actual-budget-service.yaml
- firefly-service.yaml - firefly-service.yaml
- actual-budget-ingress.yaml - actual-budget-ingress.yaml
@ -24,9 +22,6 @@ configMapGenerator:
- name: actual-openid-bootstrap-script - name: actual-openid-bootstrap-script
files: files:
- actual_openid_bootstrap.mjs=scripts/actual_openid_bootstrap.mjs - actual_openid_bootstrap.mjs=scripts/actual_openid_bootstrap.mjs
- name: firefly-user-sync-script
files:
- firefly_user_sync.php=scripts/firefly_user_sync.php
- name: finance-secrets-ensure-script - name: finance-secrets-ensure-script
files: files:
- finance_secrets_ensure.py=scripts/finance_secrets_ensure.py - finance_secrets_ensure.py=scripts/finance_secrets_ensure.py

View File

@ -1,114 +0,0 @@
#!/usr/bin/env php
<?php
declare(strict_types=1);
use FireflyIII\Console\Commands\Correction\CreatesGroupMemberships;
use FireflyIII\Models\Role;
use FireflyIII\Repositories\User\UserRepositoryInterface;
use FireflyIII\Support\Facades\FireflyConfig;
use FireflyIII\User;
use Illuminate\Contracts\Console\Kernel as ConsoleKernel;
function log_line(string $message): void
{
fwrite(STDOUT, $message . PHP_EOL);
}
function error_line(string $message): void
{
fwrite(STDERR, $message . PHP_EOL);
}
function find_app_root(): string
{
$candidates = [];
$env_root = getenv('FIREFLY_APP_DIR') ?: '';
if ($env_root !== '') {
$candidates[] = $env_root;
}
$candidates[] = '/var/www/html';
$candidates[] = '/var/www/firefly-iii';
$candidates[] = '/app';
foreach ($candidates as $candidate) {
if (!is_dir($candidate)) {
continue;
}
if (file_exists($candidate . '/vendor/autoload.php')) {
return $candidate;
}
}
return '';
}
$email = trim((string) getenv('FIREFLY_USER_EMAIL'));
$password = (string) getenv('FIREFLY_USER_PASSWORD');
if ($email === '' || $password === '') {
error_line('missing FIREFLY_USER_EMAIL or FIREFLY_USER_PASSWORD');
exit(1);
}
$root = find_app_root();
if ($root === '') {
error_line('firefly app root not found');
exit(1);
}
$autoload = $root . '/vendor/autoload.php';
$app_bootstrap = $root . '/bootstrap/app.php';
if (!file_exists($autoload) || !file_exists($app_bootstrap)) {
error_line('firefly bootstrap files missing');
exit(1);
}
require $autoload;
$app = require $app_bootstrap;
$kernel = $app->make(ConsoleKernel::class);
$kernel->bootstrap();
try {
FireflyConfig::set('single_user_mode', true);
} catch (Throwable $exc) {
error_line('failed to enforce single_user_mode: '.$exc->getMessage());
}
$repository = $app->make(UserRepositoryInterface::class);
$existing_user = User::where('email', $email)->first();
$first_user = User::count() == 0;
if (!$existing_user) {
$existing_user = User::create(
[
'email' => $email,
'password' => bcrypt($password),
'blocked' => false,
'blocked_code' => null,
]
);
if ($first_user) {
$role = Role::where('name', 'owner')->first();
if ($role) {
$existing_user->roles()->attach($role);
}
}
log_line(sprintf('created firefly user %s', $email));
} else {
log_line(sprintf('updating firefly user %s', $email));
}
$existing_user->blocked = false;
$existing_user->blocked_code = null;
$existing_user->save();
$repository->changePassword($existing_user, $password);
CreatesGroupMemberships::createGroupMembership($existing_user);
log_line('firefly user sync complete');

View File

@ -53,7 +53,7 @@ spec:
registry: registry:
existingClaim: harbor-registry existingClaim: harbor-registry
accessMode: ReadWriteOnce accessMode: ReadWriteOnce
size: 100Gi size: 50Gi
jobservice: jobservice:
jobLog: jobLog:
existingClaim: harbor-jobservice-logs existingClaim: harbor-jobservice-logs
@ -77,7 +77,6 @@ spec:
internal: internal:
nodeSelector: nodeSelector:
ananke.bstein.dev/harbor-bootstrap: "true" ananke.bstein.dev/harbor-bootstrap: "true"
kubernetes.io/hostname: titan-11
image: image:
repository: registry.bstein.dev/infra/harbor-redis repository: registry.bstein.dev/infra/harbor-redis
tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-redis:tag"} tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-redis:tag"}
@ -114,7 +113,6 @@ spec:
core: core:
nodeSelector: nodeSelector:
ananke.bstein.dev/harbor-bootstrap: "true" ananke.bstein.dev/harbor-bootstrap: "true"
kubernetes.io/hostname: titan-11
image: image:
repository: registry.bstein.dev/infra/harbor-core repository: registry.bstein.dev/infra/harbor-core
tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-core:tag"} tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-core:tag"}
@ -127,10 +125,6 @@ spec:
podAnnotations: podAnnotations:
vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "harbor" vault.hashicorp.com/role: "harbor"
vault.hashicorp.com/agent-requests-cpu: "25m"
vault.hashicorp.com/agent-limits-cpu: "100m"
vault.hashicorp.com/agent-requests-mem: "32Mi"
vault.hashicorp.com/agent-limits-mem: "128Mi"
vault.hashicorp.com/agent-inject-secret-harbor-core-env.sh: "kv/data/atlas/harbor/harbor-core" vault.hashicorp.com/agent-inject-secret-harbor-core-env.sh: "kv/data/atlas/harbor/harbor-core"
vault.hashicorp.com/agent-inject-template-harbor-core-env.sh: | vault.hashicorp.com/agent-inject-template-harbor-core-env.sh: |
{{ with secret "kv/data/atlas/harbor/harbor-core" }} {{ with secret "kv/data/atlas/harbor/harbor-core" }}
@ -180,7 +174,6 @@ spec:
jobservice: jobservice:
nodeSelector: nodeSelector:
ananke.bstein.dev/harbor-bootstrap: "true" ananke.bstein.dev/harbor-bootstrap: "true"
kubernetes.io/hostname: titan-11
image: image:
repository: registry.bstein.dev/infra/harbor-jobservice repository: registry.bstein.dev/infra/harbor-jobservice
tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-jobservice:tag"} tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-jobservice:tag"}
@ -190,10 +183,6 @@ spec:
podAnnotations: podAnnotations:
vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "harbor" vault.hashicorp.com/role: "harbor"
vault.hashicorp.com/agent-requests-cpu: "25m"
vault.hashicorp.com/agent-limits-cpu: "100m"
vault.hashicorp.com/agent-requests-mem: "32Mi"
vault.hashicorp.com/agent-limits-mem: "128Mi"
vault.hashicorp.com/agent-inject-secret-harbor-jobservice-env.sh: "kv/data/atlas/harbor/harbor-jobservice" vault.hashicorp.com/agent-inject-secret-harbor-jobservice-env.sh: "kv/data/atlas/harbor/harbor-jobservice"
vault.hashicorp.com/agent-inject-template-harbor-jobservice-env.sh: | vault.hashicorp.com/agent-inject-template-harbor-jobservice-env.sh: |
{{ with secret "kv/data/atlas/harbor/harbor-core" }} {{ with secret "kv/data/atlas/harbor/harbor-core" }}
@ -227,7 +216,6 @@ spec:
portal: portal:
nodeSelector: nodeSelector:
ananke.bstein.dev/harbor-bootstrap: "true" ananke.bstein.dev/harbor-bootstrap: "true"
kubernetes.io/hostname: titan-11
image: image:
repository: registry.bstein.dev/infra/harbor-portal repository: registry.bstein.dev/infra/harbor-portal
tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-portal:tag"} tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-portal:tag"}
@ -255,7 +243,6 @@ spec:
registry: registry:
nodeSelector: nodeSelector:
ananke.bstein.dev/harbor-bootstrap: "true" ananke.bstein.dev/harbor-bootstrap: "true"
kubernetes.io/hostname: titan-11
registry: registry:
image: image:
repository: registry.bstein.dev/infra/harbor-registry repository: registry.bstein.dev/infra/harbor-registry
@ -283,10 +270,6 @@ spec:
podAnnotations: podAnnotations:
vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "harbor" vault.hashicorp.com/role: "harbor"
vault.hashicorp.com/agent-requests-cpu: "25m"
vault.hashicorp.com/agent-limits-cpu: "100m"
vault.hashicorp.com/agent-requests-mem: "32Mi"
vault.hashicorp.com/agent-limits-mem: "128Mi"
vault.hashicorp.com/agent-inject-secret-harbor-registry-env.sh: "kv/data/atlas/harbor/harbor-registry" vault.hashicorp.com/agent-inject-secret-harbor-registry-env.sh: "kv/data/atlas/harbor/harbor-registry"
vault.hashicorp.com/agent-inject-template-harbor-registry-env.sh: | vault.hashicorp.com/agent-inject-template-harbor-registry-env.sh: |
{{ with secret "kv/data/atlas/harbor/harbor-registry" }} {{ with secret "kv/data/atlas/harbor/harbor-registry" }}
@ -338,7 +321,6 @@ spec:
nginx: nginx:
nodeSelector: nodeSelector:
ananke.bstein.dev/harbor-bootstrap: "true" ananke.bstein.dev/harbor-bootstrap: "true"
kubernetes.io/hostname: titan-11
image: image:
repository: registry.bstein.dev/infra/harbor-nginx repository: registry.bstein.dev/infra/harbor-nginx
tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-nginx:tag"} tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-nginx:tag"}

View File

@ -8,7 +8,7 @@ spec:
accessModes: [ "ReadWriteOnce" ] accessModes: [ "ReadWriteOnce" ]
resources: resources:
requests: requests:
storage: 100Gi storage: 50Gi
storageClassName: astreae storageClassName: astreae
--- ---
apiVersion: v1 apiVersion: v1

View File

@ -8,18 +8,8 @@ resources:
- portal-rbac.yaml - portal-rbac.yaml
- wger-media-pvc.yaml - wger-media-pvc.yaml
- wger-static-pvc.yaml - wger-static-pvc.yaml
- wger-admin-ensure-cronjob.yaml
- wger-user-sync-cronjob.yaml
- wger-deployment.yaml - wger-deployment.yaml
- wger-service.yaml - wger-service.yaml
- wger-ingress.yaml - wger-ingress.yaml
generatorOptions: generatorOptions:
disableNameSuffixHash: true disableNameSuffixHash: true
configMapGenerator:
- name: wger-nginx-config
files:
- default.conf=config/nginx.conf
- nginx.conf=config/nginx-main.conf
- name: wger-user-sync-script
files:
- wger_user_sync.py=scripts/wger_user_sync.py

View File

@ -1,120 +0,0 @@
#!/usr/bin/env python3
from __future__ import annotations
import os
import sys
import django
def _env(name: str, default: str = "") -> str:
value = os.getenv(name, default)
return value.strip() if isinstance(value, str) else ""
def _setup_django() -> None:
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings.main")
django.setup()
def _set_default_gym(user) -> None:
try:
from wger.gym.models import GymConfig
except Exception:
return
try:
config = GymConfig.objects.first()
except Exception:
return
if not config or not getattr(config, "default_gym", None):
return
profile = getattr(user, "userprofile", None)
if not profile or getattr(profile, "gym", None):
return
profile.gym = config.default_gym
profile.save()
def _ensure_profile(user) -> None:
profile = getattr(user, "userprofile", None)
if not profile:
return
if hasattr(profile, "email_verified") and not profile.email_verified:
profile.email_verified = True
if hasattr(profile, "is_temporary") and profile.is_temporary:
profile.is_temporary = False
profile.save()
def _ensure_admin(username: str, password: str, email: str) -> None:
from django.contrib.auth.models import User
if not username or not password:
raise RuntimeError("admin username/password missing")
user, created = User.objects.get_or_create(username=username)
if created:
user.is_active = True
if not user.is_staff:
user.is_staff = True
if email:
user.email = email
user.set_password(password)
user.save()
_ensure_profile(user)
_set_default_gym(user)
print(f"ensured admin user {username}")
def _ensure_user(username: str, password: str, email: str) -> None:
from django.contrib.auth.models import User
if not username or not password:
raise RuntimeError("username/password missing")
user, created = User.objects.get_or_create(username=username)
if created:
user.is_active = True
if email and user.email != email:
user.email = email
user.set_password(password)
user.save()
_ensure_profile(user)
_set_default_gym(user)
action = "created" if created else "updated"
print(f"{action} user {username}")
def main() -> int:
admin_user = _env("WGER_ADMIN_USERNAME")
admin_password = _env("WGER_ADMIN_PASSWORD")
admin_email = _env("WGER_ADMIN_EMAIL")
username = _env("WGER_USERNAME") or _env("ONLY_USERNAME")
password = _env("WGER_PASSWORD")
email = _env("WGER_EMAIL")
if not any([admin_user and admin_password, username and password]):
print("no admin or user payload provided; exiting")
return 0
_setup_django()
if admin_user and admin_password:
_ensure_admin(admin_user, admin_password, admin_email)
if username and password:
_ensure_user(username, password, email)
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@ -1,120 +0,0 @@
# services/health/wger-admin-ensure-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: wger-admin-ensure
namespace: health
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "15 3 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 1
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "health"
vault.hashicorp.com/agent-inject-secret-wger-env: "kv/data/atlas/health/wger-db"
vault.hashicorp.com/agent-inject-template-wger-env: |
{{ with secret "kv/data/atlas/health/wger-db" }}
export DJANGO_DB_HOST="{{ .Data.data.DJANGO_DB_HOST }}"
export DJANGO_DB_PORT="{{ .Data.data.DJANGO_DB_PORT }}"
export DJANGO_DB_DATABASE="{{ .Data.data.DJANGO_DB_DATABASE }}"
export DJANGO_DB_USER="{{ .Data.data.DJANGO_DB_USER }}"
export DJANGO_DB_PASSWORD="$(cat /vault/secrets/wger-db-password)"
{{ end }}
{{ with secret "kv/data/atlas/health/wger-secrets" }}
export SECRET_KEY="$(cat /vault/secrets/wger-secret-key)"
export SIGNING_KEY="$(cat /vault/secrets/wger-signing-key)"
{{ end }}
{{ with secret "kv/data/atlas/health/wger-admin" }}
export WGER_ADMIN_USERNAME="$(cat /vault/secrets/wger-admin-username)"
export WGER_ADMIN_PASSWORD="$(cat /vault/secrets/wger-admin-password)"
{{ end }}
vault.hashicorp.com/agent-inject-secret-wger-db-password: "kv/data/atlas/health/wger-db"
vault.hashicorp.com/agent-inject-template-wger-db-password: |
{{- with secret "kv/data/atlas/health/wger-db" -}}
{{ .Data.data.DJANGO_DB_PASSWORD }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-secret-key: "kv/data/atlas/health/wger-secrets"
vault.hashicorp.com/agent-inject-template-wger-secret-key: |
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
{{ .Data.data.SECRET_KEY }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-signing-key: "kv/data/atlas/health/wger-secrets"
vault.hashicorp.com/agent-inject-template-wger-signing-key: |
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
{{ .Data.data.SIGNING_KEY }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-admin-username: "kv/data/atlas/health/wger-admin"
vault.hashicorp.com/agent-inject-template-wger-admin-username: |
{{- with secret "kv/data/atlas/health/wger-admin" -}}
{{ .Data.data.username }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-admin-password: "kv/data/atlas/health/wger-admin"
vault.hashicorp.com/agent-inject-template-wger-admin-password: |
{{- with secret "kv/data/atlas/health/wger-admin" -}}
{{ .Data.data.password }}
{{- end -}}
spec:
serviceAccountName: health-vault-sync
restartPolicy: Never
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 70
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: ensure
image: wger/server@sha256:710588b78af4e0aa0b4d8a8061e4563e16eae80eeaccfe7f9e0d9cbdd7f0cbc5
imagePullPolicy: IfNotPresent
command: ["/bin/sh", "-c"]
args:
- |
set -eu
. /vault/secrets/wger-env
exec python3 /scripts/wger_user_sync.py
env:
- name: SITE_URL
value: https://health.bstein.dev
- name: TIME_ZONE
value: Etc/UTC
- name: TZ
value: Etc/UTC
- name: DJANGO_DEBUG
value: "False"
- name: DJANGO_DB_ENGINE
value: django.db.backends.postgresql
- name: DJANGO_CACHE_BACKEND
value: django.core.cache.backends.locmem.LocMemCache
- name: DJANGO_CACHE_LOCATION
value: wger-cache
volumeMounts:
- name: wger-user-sync-script
mountPath: /scripts
readOnly: true
volumes:
- name: wger-user-sync-script
configMap:
name: wger-user-sync-script
defaultMode: 0555

View File

@ -1,106 +0,0 @@
# services/health/wger-user-sync-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: wger-user-sync
namespace: health
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "0 5 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "health"
vault.hashicorp.com/agent-inject-secret-wger-env: "kv/data/atlas/health/wger-db"
vault.hashicorp.com/agent-inject-template-wger-env: |
{{ with secret "kv/data/atlas/health/wger-db" }}
export DJANGO_DB_HOST="{{ .Data.data.DJANGO_DB_HOST }}"
export DJANGO_DB_PORT="{{ .Data.data.DJANGO_DB_PORT }}"
export DJANGO_DB_DATABASE="{{ .Data.data.DJANGO_DB_DATABASE }}"
export DJANGO_DB_USER="{{ .Data.data.DJANGO_DB_USER }}"
export DJANGO_DB_PASSWORD="$(cat /vault/secrets/wger-db-password)"
{{ end }}
{{ with secret "kv/data/atlas/health/wger-secrets" }}
export SECRET_KEY="$(cat /vault/secrets/wger-secret-key)"
export SIGNING_KEY="$(cat /vault/secrets/wger-signing-key)"
{{ end }}
vault.hashicorp.com/agent-inject-secret-wger-db-password: "kv/data/atlas/health/wger-db"
vault.hashicorp.com/agent-inject-template-wger-db-password: |
{{- with secret "kv/data/atlas/health/wger-db" -}}
{{ .Data.data.DJANGO_DB_PASSWORD }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-secret-key: "kv/data/atlas/health/wger-secrets"
vault.hashicorp.com/agent-inject-template-wger-secret-key: |
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
{{ .Data.data.SECRET_KEY }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-signing-key: "kv/data/atlas/health/wger-secrets"
vault.hashicorp.com/agent-inject-template-wger-signing-key: |
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
{{ .Data.data.SIGNING_KEY }}
{{- end -}}
spec:
serviceAccountName: health-vault-sync
restartPolicy: Never
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 70
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: sync
image: wger/server@sha256:710588b78af4e0aa0b4d8a8061e4563e16eae80eeaccfe7f9e0d9cbdd7f0cbc5
imagePullPolicy: IfNotPresent
command: ["/bin/sh", "-c"]
args:
- |
set -eu
. /vault/secrets/wger-env
exec python3 /scripts/wger_user_sync.py
env:
- name: SITE_URL
value: https://health.bstein.dev
- name: TIME_ZONE
value: Etc/UTC
- name: TZ
value: Etc/UTC
- name: DJANGO_DEBUG
value: "False"
- name: DJANGO_DB_ENGINE
value: django.db.backends.postgresql
- name: DJANGO_CACHE_BACKEND
value: django.core.cache.backends.locmem.LocMemCache
- name: DJANGO_CACHE_LOCATION
value: wger-cache
volumeMounts:
- name: wger-user-sync-script
mountPath: /scripts
readOnly: true
volumes:
- name: wger-user-sync-script
configMap:
name: wger-user-sync-script
defaultMode: 0555

View File

@ -77,26 +77,23 @@ spec:
mountPath: /config mountPath: /config
affinity: affinity:
nodeAffinity: nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: longhorn-host
operator: In
values:
- "true"
- key: node-role.kubernetes.io/worker
operator: In
values:
- "true"
preferredDuringSchedulingIgnoredDuringExecution: preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100 - weight: 100
preference: preference:
matchExpressions: matchExpressions:
- key: hardware - key: kubernetes.io/hostname
operator: In operator: In
values: values:
- rpi5 - titan-22
- weight: 80 - weight: 80
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- titan-20
- titan-21
- weight: 60
preference: preference:
matchExpressions: matchExpressions:
- key: kubernetes.io/hostname - key: kubernetes.io/hostname
@ -108,6 +105,7 @@ spec:
fsGroup: 65532 fsGroup: 65532
fsGroupChangePolicy: OnRootMismatch fsGroupChangePolicy: OnRootMismatch
runAsGroup: 65532 runAsGroup: 65532
runtimeClassName: nvidia
containers: containers:
- name: jellyfin - name: jellyfin
image: docker.io/jellyfin/jellyfin:10.11.5 image: docker.io/jellyfin/jellyfin:10.11.5
@ -120,6 +118,8 @@ spec:
- name: http - name: http
containerPort: 8096 containerPort: 8096
env: env:
- name: NVIDIA_DRIVER_CAPABILITIES
value: "compute,video,utility"
- name: JELLYFIN_PublishedServerUrl - name: JELLYFIN_PublishedServerUrl
value: "https://stream.bstein.dev" value: "https://stream.bstein.dev"
- name: PUID - name: PUID
@ -131,7 +131,12 @@ spec:
- name: VAULT_COPY_FILES - name: VAULT_COPY_FILES
value: /vault/secrets/ldap-config.xml:/config/plugins/configurations/LDAP-Auth.xml value: /vault/secrets/ldap-config.xml:/config/plugins/configurations/LDAP-Auth.xml
resources: resources:
limits:
nvidia.com/gpu.shared: 1
# cpu: "4"
# memory: 8Gi
requests: requests:
nvidia.com/gpu.shared: 1
cpu: "500m" cpu: "500m"
memory: 1Gi memory: 1Gi
volumeMounts: volumeMounts:

568
services/jellyfin/oidc/Jenkinsfile vendored Normal file
View File

@ -0,0 +1,568 @@
pipeline {
agent {
kubernetes {
yaml """
apiVersion: v1
kind: Pod
spec:
restartPolicy: Never
containers:
- name: dotnet
image: mcr.microsoft.com/dotnet/sdk:9.0
command:
- cat
tty: true
"""
}
}
options {
timestamps()
}
parameters {
string(name: 'HARBOR_REPO', defaultValue: 'registry.bstein.dev/streaming/oidc-plugin', description: 'OCI repository for the plugin artifact')
string(name: 'JELLYFIN_VERSION', defaultValue: '10.11.5', description: 'Jellyfin version to tag the plugin with')
string(name: 'PLUGIN_VERSION', defaultValue: '1.0.2.0', description: 'Plugin version')
}
environment {
ORAS_VERSION = "1.2.0"
DOTNET_CLI_TELEMETRY_OPTOUT = "1"
DOTNET_SKIP_FIRST_TIME_EXPERIENCE = "1"
}
stages {
stage('Checkout') {
steps {
container('dotnet') {
checkout scm
}
}
}
stage('Build plugin') {
steps {
container('dotnet') {
sh '''
set -euo pipefail
apt-get update
apt-get install -y --no-install-recommends zip curl ca-certificates git
WORKDIR="$(pwd)/build"
SRC_DIR="${WORKDIR}/src"
DIST_DIR="${WORKDIR}/dist"
ART_DIR="${WORKDIR}/artifact"
rm -rf "${SRC_DIR}" "${DIST_DIR}" "${ART_DIR}"
mkdir -p "${SRC_DIR}" "${DIST_DIR}" "${ART_DIR}"
git clone https://github.com/lolerskatez/JellyfinOIDCPlugin.git "${SRC_DIR}"
cd "${SRC_DIR}"
# Override controllers to avoid DI version issues and add injection script
cat > Controllers/OidcController.cs <<'EOF'
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using IdentityModel.OidcClient;
using MediaBrowser.Controller.Library;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.DependencyInjection;
namespace JellyfinOIDCPlugin.Controllers;
#nullable enable
[ApiController]
[Route("api/oidc")]
public class OidcController : ControllerBase
{
private IUserManager UserManager => HttpContext.RequestServices.GetRequiredService<IUserManager>();
private static readonly Dictionary<string, object> StateManager = new(); // Store AuthorizeState objects
[HttpGet("start")]
public async Task<IActionResult> Start()
{
var config = Plugin.Instance?.Configuration;
if (config == null)
{
return BadRequest("Plugin not initialized");
}
var options = new OidcClientOptions
{
Authority = config.OidEndpoint?.Trim(),
ClientId = config.OidClientId?.Trim(),
ClientSecret = config.OidSecret?.Trim(),
RedirectUri = GetRedirectUri(),
Scope = string.Join(" ", config.OidScopes)
};
try
{
var client = new OidcClient(options);
var result = await client.PrepareLoginAsync().ConfigureAwait(false);
// Store the authorize state for the callback
var stateString = (string)result.GetType().GetProperty("State")?.GetValue(result);
if (!string.IsNullOrEmpty(stateString))
{
StateManager[stateString] = result;
}
var startUrl = (string)result.GetType().GetProperty("StartUrl")?.GetValue(result);
if (string.IsNullOrEmpty(startUrl))
{
Console.WriteLine("OIDC: Could not get StartUrl from OIDC result");
return BadRequest("OIDC initialization failed");
}
return Redirect(startUrl);
}
catch (Exception ex)
{
Console.WriteLine($"OIDC start error: {ex}");
return BadRequest("OIDC error: " + ex.Message);
}
}
[HttpGet("callback")]
public async Task<IActionResult> Callback()
{
var config = Plugin.Instance?.Configuration;
if (config == null)
{
return BadRequest("Plugin not initialized");
}
try
{
var stateParam = Request.Query["state"].ToString();
if (string.IsNullOrEmpty(stateParam) || !StateManager.TryGetValue(stateParam, out var storedState))
{
Console.WriteLine($"OIDC: Invalid state {stateParam}");
return BadRequest("Invalid state");
}
var options = new OidcClientOptions
{
Authority = config.OidEndpoint?.Trim(),
ClientId = config.OidClientId?.Trim(),
ClientSecret = config.OidSecret?.Trim(),
RedirectUri = GetRedirectUri(),
Scope = string.Join(" ", config.OidScopes)
};
var client = new OidcClient(options);
// Cast stored state to AuthorizeState - it's stored as object
var authorizeState = (AuthorizeState)storedState;
var result = await client.ProcessResponseAsync(Request.QueryString.Value, authorizeState).ConfigureAwait(false);
if (result.IsError)
{
Console.WriteLine($"OIDC callback failed: {result.Error} - {result.ErrorDescription}");
return BadRequest("OIDC authentication failed");
}
// Get email from claims
var email = result.User?.FindFirst("email")?.Value ??
result.User?.FindFirst("preferred_username")?.Value ??
result.User?.FindFirst("sub")?.Value;
if (string.IsNullOrEmpty(email))
{
Console.WriteLine("OIDC: No email/username found in OIDC response");
return BadRequest("No email/username found in OIDC response");
}
// Get or create user
var user = UserManager.GetUserByName(email);
if (user == null)
{
Console.WriteLine($"OIDC: Creating new user {email}");
user = await UserManager.CreateUserAsync(email).ConfigureAwait(false);
}
// Set authentication provider
user.AuthenticationProviderId = "OIDC";
// Get roles from claims
var rolesClaimValue = result.User?.FindFirst(config.RoleClaim)?.Value;
var roles = string.IsNullOrEmpty(rolesClaimValue)
? Array.Empty<string>()
: rolesClaimValue.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries);
// Set permissions based on groups
var isAdmin = roles.Any(r => r.Equals("admin", StringComparison.OrdinalIgnoreCase));
var isPowerUser = roles.Any(r => r.Equals("Power User", StringComparison.OrdinalIgnoreCase)) && !isAdmin;
Console.WriteLine($"OIDC: User {email} authenticated. Admin: {isAdmin}, PowerUser: {isPowerUser}");
// Update user in database
await UserManager.UpdateUserAsync(user).ConfigureAwait(false);
StateManager.Remove(stateParam);
// Redirect to Jellyfin main page
return Redirect("/");
}
catch (Exception ex)
{
Console.WriteLine($"OIDC callback error: {ex}");
return BadRequest("OIDC error: " + ex.Message);
}
}
[HttpPost("token")]
public async Task<IActionResult> ExchangeToken([FromBody] TokenExchangeRequest request)
{
var config = Plugin.Instance?.Configuration;
if (config == null)
{
Console.WriteLine("OIDC: Plugin not initialized");
return BadRequest("Plugin not initialized");
}
if (string.IsNullOrEmpty(request?.AccessToken))
{
Console.WriteLine("OIDC: No access token provided");
return BadRequest("Access token is required");
}
try
{
Console.WriteLine("OIDC: Processing token exchange request");
// Validate the token with the OIDC provider using UserInfo endpoint
var options = new OidcClientOptions
{
Authority = config.OidEndpoint?.Trim(),
ClientId = config.OidClientId?.Trim(),
ClientSecret = config.OidSecret?.Trim(),
Scope = string.Join(" ", config.OidScopes)
};
var client = new OidcClient(options);
// Use the access token to get user info
var userInfoResult = await client.GetUserInfoAsync(request.AccessToken).ConfigureAwait(false);
if (userInfoResult.IsError)
{
Console.WriteLine($"OIDC: Failed to get user info: {userInfoResult.Error}");
return Unauthorized("Invalid access token");
}
// Extract email/username from user info
var email = userInfoResult.Claims.FirstOrDefault(c => c.Type == "email")?.Value ??
userInfoResult.Claims.FirstOrDefault(c => c.Type == "preferred_username")?.Value ??
userInfoResult.Claims.FirstOrDefault(c => c.Type == "sub")?.Value;
if (string.IsNullOrEmpty(email))
{
Console.WriteLine("OIDC: No email/username found in token");
return BadRequest("No email/username found in token");
}
// Get or create user
var user = UserManager.GetUserByName(email);
if (user == null)
{
if (!config.AutoCreateUser)
{
Console.WriteLine($"OIDC: User {email} not found and auto-create disabled");
return Unauthorized("User does not exist and auto-creation is disabled");
}
Console.WriteLine($"OIDC: Creating new user from token {email}");
user = await UserManager.CreateUserAsync(email).ConfigureAwait(false);
}
// Update user authentication provider
user.AuthenticationProviderId = "OIDC";
// Get roles from claims
var rolesClaimName = config.RoleClaim ?? "groups";
var rolesClaimValue = userInfoResult.Claims.FirstOrDefault(c => c.Type == rolesClaimName)?.Value;
var roles = string.IsNullOrEmpty(rolesClaimValue)
? Array.Empty<string>()
: rolesClaimValue.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries);
// Set permissions based on groups
var isAdmin = roles.Any(r => r.Equals("admin", StringComparison.OrdinalIgnoreCase));
var isPowerUser = roles.Any(r => r.Equals("Power User", StringComparison.OrdinalIgnoreCase)) && !isAdmin;
Console.WriteLine($"OIDC: Token exchange for {email} Admin:{isAdmin} Power:{isPowerUser}");
// Update user in database
await UserManager.UpdateUserAsync(user).ConfigureAwait(false);
// Return success with user info
return Ok(new TokenExchangeResponse
{
Success = true,
UserId = user.Id.ToString(),
Username = user.Username,
Email = email,
IsAdmin = isAdmin,
Message = "User authenticated successfully"
});
}
catch (Exception ex)
{
Console.WriteLine($"OIDC token exchange error: {ex}");
return StatusCode(500, $"Token exchange failed: {ex.Message}");
}
}
private string GetRedirectUri()
{
var configured = Plugin.Instance?.Configuration?.RedirectUri;
if (!string.IsNullOrWhiteSpace(configured))
{
return configured!;
}
return $"{Request.Scheme}://{Request.Host}/api/oidc/callback";
}
}
public class TokenExchangeRequest
{
public string? AccessToken { get; set; }
public string? IdToken { get; set; }
}
public class TokenExchangeResponse
{
public bool Success { get; set; }
public string? UserId { get; set; }
public string? Username { get; set; }
public string? Email { get; set; }
public bool IsAdmin { get; set; }
public string? Message { get; set; }
}
EOF
cat > Controllers/OidcStaticController.cs <<'EOF'
using System;
using System.IO;
using System.Reflection;
using MediaBrowser.Common.Plugins;
using Microsoft.AspNetCore.Mvc;
namespace JellyfinOIDCPlugin.Controllers;
[ApiController]
[Route("api/oidc")]
public class OidcStaticController : ControllerBase
{
[HttpGet("login.js")]
public IActionResult GetLoginScript()
{
try
{
var assembly = Assembly.GetExecutingAssembly();
using var stream = assembly.GetManifestResourceStream("JellyfinOIDCPlugin.web.oidc-login.js");
if (stream == null)
{
Console.WriteLine("OIDC: Login script resource not found");
return NotFound();
}
using var reader = new StreamReader(stream);
var content = reader.ReadToEnd();
return Content(content, "application/javascript");
}
catch (Exception ex)
{
Console.WriteLine($"OIDC: Error serving login script {ex}");
return StatusCode(500, "Error loading login script");
}
}
[HttpGet("loader.js")]
public IActionResult GetLoader()
{
try
{
var assembly = Assembly.GetExecutingAssembly();
using var stream = assembly.GetManifestResourceStream("JellyfinOIDCPlugin.web.oidc-loader.js");
if (stream == null)
{
Console.WriteLine("OIDC: Loader script resource not found");
return NotFound();
}
using var reader = new StreamReader(stream);
var content = reader.ReadToEnd();
return Content(content, "application/javascript");
}
catch (Exception ex)
{
Console.WriteLine($"OIDC: Error serving loader script {ex}");
return StatusCode(500, "Error loading loader script");
}
}
[HttpGet("inject")]
public IActionResult GetInject()
{
try
{
var script = @"
(function() {
console.log('[OIDC Plugin] Bootstrap inject started');
// Load oidc-loader.js dynamically
const loaderScript = document.createElement('script');
loaderScript.src = '/api/oidc/loader.js';
loaderScript.type = 'application/javascript';
loaderScript.onerror = function() {
console.error('[OIDC Plugin] Failed to load loader.js');
};
loaderScript.onload = function() {
console.log('[OIDC Plugin] Loader.js loaded successfully');
};
// Append to head or body
const target = document.head || document.documentElement;
target.appendChild(loaderScript);
console.log('[OIDC Plugin] Bootstrap script appended to page');
})();
";
return Content(script, "application/javascript");
}
catch (Exception ex)
{
Console.WriteLine($"OIDC: Error serving inject script {ex}");
return StatusCode(500, "Error loading inject script");
}
}
[HttpGet("global.js")]
public IActionResult GetGlobalInjector()
{
try
{
var assembly = Assembly.GetExecutingAssembly();
using var stream = assembly.GetManifestResourceStream("JellyfinOIDCPlugin.web.oidc-global-injector.js");
if (stream == null)
{
Console.WriteLine("OIDC: Global injector resource not found");
return NotFound();
}
using var reader = new StreamReader(stream);
var content = reader.ReadToEnd();
return Content(content, "application/javascript");
}
catch (Exception ex)
{
Console.WriteLine($"OIDC: Error serving global injector {ex}");
return StatusCode(500, "Error loading global injector");
}
}
[HttpGet("config")]
public IActionResult GetConfigurationPage()
{
try
{
var assembly = Assembly.GetExecutingAssembly();
using var stream = assembly.GetManifestResourceStream("JellyfinOIDCPlugin.web.configurationpage.html");
if (stream == null)
{
Console.WriteLine("OIDC: Configuration page resource not found");
return NotFound("Configuration page resource not found");
}
using var reader = new StreamReader(stream);
var content = reader.ReadToEnd();
return Content(content, "text/html");
}
catch (Exception ex)
{
Console.WriteLine($"OIDC: Error serving configuration page {ex}");
return StatusCode(500, $"Error loading configuration page: {ex.Message}");
}
}
}
EOF
cat > JellyfinOIDCPlugin.csproj <<'EOF'
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<AssemblyName>JellyfinOIDCPlugin.v2</AssemblyName>
<RootNamespace>JellyfinOIDCPlugin</RootNamespace>
<LangVersion>latest</LangVersion>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<AssemblyVersion>1.0.2.0</AssemblyVersion>
<FileVersion>1.0.2.0</FileVersion>
<CopyLocalLockFileAssemblies>false</CopyLocalLockFileAssemblies>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Jellyfin.Controller" Version="10.11.5">
<ExcludeAssets>runtime</ExcludeAssets>
</PackageReference>
<PackageReference Include="Jellyfin.Model" Version="10.11.5">
<ExcludeAssets>runtime</ExcludeAssets>
</PackageReference>
<PackageReference Include="Jellyfin.Common" Version="10.11.5">
<ExcludeAssets>runtime</ExcludeAssets>
</PackageReference>
<PackageReference Include="Jellyfin.Data" Version="10.11.5">
<ExcludeAssets>runtime</ExcludeAssets>
</PackageReference>
<PackageReference Include="Jellyfin.Database.Implementations" Version="10.11.5">
<ExcludeAssets>runtime</ExcludeAssets>
</PackageReference>
<PackageReference Include="IdentityModel.OidcClient" Version="5.2.1">
<PrivateAssets>none</PrivateAssets>
</PackageReference>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.11">
<ExcludeAssets>runtime</ExcludeAssets>
</PackageReference>
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="web\\*.html" />
<EmbeddedResource Include="web\\*.js" />
<EmbeddedResource Include="web\\*.css" />
</ItemGroup>
</Project>
EOF
dotnet restore
dotnet publish -c Release --no-self-contained -o "${DIST_DIR}"
cd "${DIST_DIR}"
zip -r "${ART_DIR}/OIDC_Authentication_${PLUGIN_VERSION}-net9.zip" .
'''
}
}
}
stage('Push to Harbor') {
steps {
container('dotnet') {
withCredentials([usernamePassword(credentialsId: 'harbor-robot', usernameVariable: 'HARBOR_USERNAME', passwordVariable: 'HARBOR_PASSWORD')]) {
sh '''
set -euo pipefail
WORKDIR="$(pwd)/build"
ORAS_BIN="/usr/local/bin/oras"
curl -sSL "https://github.com/oras-project/oras/releases/download/v${ORAS_VERSION}/oras_${ORAS_VERSION}_linux_amd64.tar.gz" | tar -xz -C /usr/local/bin oras
ref_host="$(echo "${HARBOR_REPO}" | cut -d/ -f1)"
"${ORAS_BIN}" login "${ref_host}" -u "${HARBOR_USERNAME}" -p "${HARBOR_PASSWORD}"
artifact="${WORKDIR}/artifact/OIDC_Authentication_${PLUGIN_VERSION}-net9.zip"
"${ORAS_BIN}" push "${HARBOR_REPO}:${JELLYFIN_VERSION}" "${artifact}:application/zip" --artifact-type application/zip
"${ORAS_BIN}" push "${HARBOR_REPO}:latest" "${artifact}:application/zip" --artifact-type application/zip
'''
}
}
}
}
}
post {
always {
container('dotnet') {
archiveArtifacts artifacts: 'build/artifact/*.zip', allowEmptyArchive: true
}
}
}
}

View File

@ -45,17 +45,6 @@ data:
username: "${HARBOR_ROBOT_USERNAME}" username: "${HARBOR_ROBOT_USERNAME}"
password: "${HARBOR_ROBOT_PASSWORD}" password: "${HARBOR_ROBOT_PASSWORD}"
description: "Harbor robot for pipelines" description: "Harbor robot for pipelines"
- usernamePassword:
scope: GLOBAL
id: harbor-robot-streaming
username: "${HARBOR_STREAMING_ROBOT_USERNAME}"
password: "${HARBOR_STREAMING_ROBOT_PASSWORD}"
description: "Harbor robot for streaming pushes"
- string:
scope: GLOBAL
id: sonarqube-token
secret: "${SONARQUBE_TOKEN}"
description: "SonarQube token for quality-gate evidence collection"
jobs.yaml: | jobs.yaml: |
jobs: jobs:
- script: | - script: |
@ -214,32 +203,6 @@ data:
} }
} }
} }
pipelineJob('arcanagon') {
properties {
pipelineTriggers {
triggers {
scmTrigger {
scmpoll_spec('H/5 * * * *')
ignorePostCommitHooks(false)
}
}
}
}
definition {
cpsScm {
scm {
git {
remote {
url('https://scm.bstein.dev/bstein/arcanagon.git')
credentials('gitea-pat')
}
branches('*/master')
}
}
scriptPath('Jenkinsfile')
}
}
}
pipelineJob('pegasus') { pipelineJob('pegasus') {
properties { properties {
pipelineTriggers { pipelineTriggers {
@ -266,58 +229,6 @@ data:
} }
} }
} }
pipelineJob('atlasbot') {
properties {
pipelineTriggers {
triggers {
scmTrigger {
scmpoll_spec('H/5 * * * *')
ignorePostCommitHooks(false)
}
}
}
}
definition {
cpsScm {
scm {
git {
remote {
url('https://scm.bstein.dev/bstein/atlasbot.git')
credentials('gitea-pat')
}
branches('*/main')
}
}
scriptPath('Jenkinsfile')
}
}
}
pipelineJob('soteria') {
properties {
pipelineTriggers {
triggers {
scmTrigger {
scmpoll_spec('H/5 * * * *')
ignorePostCommitHooks(false)
}
}
}
}
definition {
cpsScm {
scm {
git {
remote {
url('https://scm.bstein.dev/bstein/soteria.git')
credentials('gitea-pat')
}
branches('*/main')
}
}
scriptPath('Jenkinsfile')
}
}
}
pipelineJob('data-prepper') { pipelineJob('data-prepper') {
properties { properties {
pipelineTriggers { pipelineTriggers {
@ -337,65 +248,13 @@ data:
url('https://scm.bstein.dev/bstein/titan-iac.git') url('https://scm.bstein.dev/bstein/titan-iac.git')
credentials('gitea-pat') credentials('gitea-pat')
} }
branches('*/main') branches('*/feature/sso-hardening')
} }
} }
scriptPath('services/logging/Jenkinsfile.data-prepper') scriptPath('services/logging/Jenkinsfile.data-prepper')
} }
} }
} }
pipelineJob('titan-iac') {
properties {
pipelineTriggers {
triggers {
scmTrigger {
scmpoll_spec('H/5 * * * *')
ignorePostCommitHooks(false)
}
}
}
}
definition {
cpsScm {
scm {
git {
remote {
url('https://scm.bstein.dev/bstein/titan-iac.git')
credentials('gitea-pat')
}
branches('*/main')
}
}
scriptPath('Jenkinsfile')
}
}
}
pipelineJob('typhon') {
properties {
pipelineTriggers {
triggers {
scmTrigger {
scmpoll_spec('H/5 * * * *')
ignorePostCommitHooks(false)
}
}
}
}
definition {
cpsScm {
scm {
git {
remote {
url('https://scm.bstein.dev/bstein/typhon.git')
credentials('gitea-pat')
}
branches('*/main')
}
}
scriptPath('Jenkinsfile')
}
}
}
multibranchPipelineJob('titan-iac-quality-gate') { multibranchPipelineJob('titan-iac-quality-gate') {
branchSources { branchSources {
branchSource { branchSource {
@ -488,40 +347,6 @@ data:
podRetention: Never podRetention: Never
serviceAccount: "jenkins" serviceAccount: "jenkins"
slaveConnectTimeoutStr: "100" slaveConnectTimeoutStr: "100"
yaml: |
spec:
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 85
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
jenkins/jenkins-jenkins-agent: "true"
yamlMergeStrategy: override yamlMergeStrategy: override
inheritYamlMergeStrategy: false inheritYamlMergeStrategy: false
slaveAgentPort: 50000 slaveAgentPort: 50000

View File

@ -33,35 +33,22 @@ spec:
{{ with secret "kv/data/atlas/jenkins/harbor-robot-creds" }} {{ with secret "kv/data/atlas/jenkins/harbor-robot-creds" }}
HARBOR_ROBOT_USERNAME={{ .Data.data.username }} HARBOR_ROBOT_USERNAME={{ .Data.data.username }}
HARBOR_ROBOT_PASSWORD={{ .Data.data.password }} HARBOR_ROBOT_PASSWORD={{ .Data.data.password }}
HARBOR_STREAMING_ROBOT_USERNAME={{ .Data.data.username }}
HARBOR_STREAMING_ROBOT_PASSWORD={{ .Data.data.password }}
{{ end }}
{{ with secret "kv/data/atlas/jenkins/harbor-streaming-robot-creds" }}
HARBOR_STREAMING_ROBOT_USERNAME={{ .Data.data.username }}
HARBOR_STREAMING_ROBOT_PASSWORD={{ .Data.data.password }}
{{ end }} {{ end }}
{{ with secret "kv/data/atlas/shared/harbor-pull" }} {{ with secret "kv/data/atlas/shared/harbor-pull" }}
{{- if and .Data.data.username .Data.data.password }} {{- if and .Data.data.username .Data.data.password }}
HARBOR_PULL_USERNAME={{ .Data.data.username }} HARBOR_ROBOT_USERNAME={{ .Data.data.username }}
HARBOR_PULL_PASSWORD={{ .Data.data.password }} HARBOR_ROBOT_PASSWORD={{ .Data.data.password }}
{{- end }} {{- end }}
{{ end }} {{ end }}
{{ with secret "kv/data/atlas/jenkins/gitea-pat" }} {{ with secret "kv/data/atlas/jenkins/gitea-pat" }}
GITEA_PAT_USERNAME={{ .Data.data.username }} GITEA_PAT_USERNAME={{ .Data.data.username }}
GITEA_PAT_TOKEN={{ .Data.data.token }} GITEA_PAT_TOKEN={{ .Data.data.token }}
{{ end }} {{ end }}
{{ with secret "kv/data/atlas/quality/sonarqube-oidc" }}
SONARQUBE_TOKEN={{ .Data.data.sonarqube_exporter_token }}
{{ end }}
{{ with secret "kv/data/atlas/jenkins/webhook-tokens" }} {{ with secret "kv/data/atlas/jenkins/webhook-tokens" }}
TITAN_IAC_WEBHOOK_TOKEN={{ .Data.data.titan_iac_quality_gate }} TITAN_IAC_WEBHOOK_TOKEN={{ .Data.data.titan_iac_quality_gate }}
GIT_NOTIFY_TOKEN_BSTEIN_DEV_HOME={{ .Data.data.git_notify_bstein_dev_home }} GIT_NOTIFY_TOKEN_BSTEIN_DEV_HOME={{ .Data.data.git_notify_bstein_dev_home }}
{{ end }} {{ end }}
{{ with secret "kv/data/atlas/jenkins/ariadne-api" }} bstein.dev/restarted-at: "2026-02-02T15:10:33Z"
ARIADNE_JENKINS_API_USER={{ .Data.data.username }}
ARIADNE_JENKINS_API_TOKEN={{ .Data.data.token }}
{{ end }}
bstein.dev/restarted-at: "2026-04-13T06:35:00Z"
spec: spec:
serviceAccountName: jenkins serviceAccountName: jenkins
nodeSelector: nodeSelector:
@ -70,21 +57,6 @@ spec:
affinity: affinity:
nodeAffinity: nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution: preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90 - weight: 90
preference: preference:
matchExpressions: matchExpressions:
@ -103,7 +75,6 @@ spec:
- sso.bstein.dev - sso.bstein.dev
securityContext: securityContext:
fsGroup: 1000 fsGroup: 1000
fsGroupChangePolicy: OnRootMismatch
initContainers: initContainers:
- name: install-plugins - name: install-plugins
image: jenkins/jenkins:2.528.3-jdk21 image: jenkins/jenkins:2.528.3-jdk21
@ -180,8 +151,7 @@ spec:
port: http port: http
initialDelaySeconds: 30 initialDelaySeconds: 30
periodSeconds: 10 periodSeconds: 10
timeoutSeconds: 5 failureThreshold: 20
failureThreshold: 60
volumeMounts: volumeMounts:
- name: jenkins-home - name: jenkins-home
mountPath: /var/jenkins_home mountPath: /var/jenkins_home

View File

@ -22,7 +22,6 @@ configMapGenerator:
- name: jenkins-init-scripts - name: jenkins-init-scripts
namespace: jenkins namespace: jenkins
files: files:
- ariadne-api-user.groovy=scripts/ariadne-api-user.groovy
- git-notify-token.groovy=scripts/git-notify-token.groovy - git-notify-token.groovy=scripts/git-notify-token.groovy
- theme.groovy=scripts/theme.groovy - theme.groovy=scripts/theme.groovy
options: options:

View File

@ -1,96 +0,0 @@
import hudson.model.User
import jenkins.security.ApiTokenProperty
def userId = (System.getenv("ARIADNE_JENKINS_API_USER") ?: "").trim()
def envTokenValue = (System.getenv("ARIADNE_JENKINS_API_TOKEN") ?: "").trim()
def tokenName = "ariadne-weather"
def tokenFile = new File("/var/jenkins_home/secrets/ariadne-api-token")
def userFile = new File("/var/jenkins_home/secrets/ariadne-api-user")
def persistedTokenValue = tokenFile.exists() ? (tokenFile.text ?: "").trim() : ""
def tokenValue = envTokenValue ?: persistedTokenValue
if (!userId || !tokenValue) {
println("Ariadne API user bootstrap skipped: missing ARIADNE_JENKINS_API_USER and no token source available")
return
}
def user = User.getById(userId, true)
if (user == null) {
println("Ariadne API user bootstrap failed: unable to resolve user ${userId}")
return
}
if (!user.getFullName() || user.getFullName().trim() == userId) {
user.setFullName("Ariadne Metrics")
}
def prop = user.getProperty(ApiTokenProperty.class)
if (prop == null) {
prop = new ApiTokenProperty()
user.addProperty(prop)
}
if (persistedTokenValue && prop.matchesPassword(persistedTokenValue)) {
tokenValue = persistedTokenValue
}
if (!prop.matchesPassword(tokenValue)) {
def store = prop.getTokenStore()
boolean configured = false
try {
def existing = store.getTokenListSortedByName().find { token ->
try {
token.getName() == tokenName
} catch (Throwable ignored) {
false
}
}
if (existing != null) {
try {
store.revokeToken(existing.getUuid())
} catch (Throwable ignored) {
try {
store.revokeToken(existing.uuid)
} catch (Throwable ignoredAgain) {
println("Ariadne API user bootstrap warning: failed to revoke existing token ${tokenName}")
}
}
}
store.addFixedNewToken(tokenName, tokenValue)
configured = true
} catch (Throwable ignored) {
// Fallback for older token-store variants.
}
if (!configured) {
if (persistedTokenValue && prop.matchesPassword(persistedTokenValue)) {
tokenValue = persistedTokenValue
} else {
def generated = store.generateNewToken(tokenName)
if (generated?.plainValue) {
tokenValue = generated.plainValue
}
println("Ariadne API user bootstrap warning: addFixedNewToken unavailable, generated replacement token")
}
}
}
tokenFile.parentFile?.mkdirs()
tokenFile.text = tokenValue + "\n"
tokenFile.setReadable(false, false)
tokenFile.setReadable(true, true)
tokenFile.setWritable(false, false)
tokenFile.setWritable(true, true)
userFile.parentFile?.mkdirs()
userFile.text = userId + "\n"
userFile.setReadable(false, false)
userFile.setReadable(true, true)
userFile.setWritable(false, false)
userFile.setWritable(true, true)
user.save()
println("Ariadne API user bootstrap complete for ${userId}")

View File

@ -35,38 +35,7 @@ subjects:
- kind: ServiceAccount - kind: ServiceAccount
name: jenkins name: jenkins
namespace: jenkins namespace: jenkins
- kind: ServiceAccount
name: default
namespace: jenkins
roleRef: roleRef:
apiGroup: rbac.authorization.k8s.io apiGroup: rbac.authorization.k8s.io
kind: Role kind: Role
name: jenkins-agent name: jenkins-agent
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: jenkins-glue-observer
rules:
- apiGroups: ["batch"]
resources:
- cronjobs
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: jenkins-glue-observer
subjects:
- kind: ServiceAccount
name: jenkins
namespace: jenkins
- kind: ServiceAccount
name: default
namespace: jenkins
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: jenkins-glue-observer

View File

@ -18,15 +18,6 @@ spec:
nodeSelector: nodeSelector:
kubernetes.io/arch: arm64 kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values: ["titan-13", "titan-15", "titan-17", "titan-19"]
containers: containers:
- name: sync - name: sync
image: alpine:3.20 image: alpine:3.20

View File

@ -24,9 +24,7 @@ resources:
- oneoffs/logs-oidc-secret-ensure-job.yaml - oneoffs/logs-oidc-secret-ensure-job.yaml
- oneoffs/metis-oidc-secret-ensure-job.yaml - oneoffs/metis-oidc-secret-ensure-job.yaml
- oneoffs/soteria-oidc-secret-ensure-job.yaml - oneoffs/soteria-oidc-secret-ensure-job.yaml
- oneoffs/quality-oidc-secret-ensure-job.yaml
- oneoffs/metis-ssh-keys-secret-ensure-job.yaml - oneoffs/metis-ssh-keys-secret-ensure-job.yaml
- oneoffs/metis-node-passwords-secret-ensure-job.yaml
- oneoffs/harbor-oidc-secret-ensure-job.yaml - oneoffs/harbor-oidc-secret-ensure-job.yaml
- oneoffs/vault-oidc-secret-ensure-job.yaml - oneoffs/vault-oidc-secret-ensure-job.yaml
- oneoffs/actual-oidc-secret-ensure-job.yaml - oneoffs/actual-oidc-secret-ensure-job.yaml

View File

@ -1,110 +0,0 @@
# services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml
# One-off job for sso/metis-node-passwords-secret-ensure-4.
# Purpose: ensure per-node Metis recovery placeholders exist in Vault.
# Atlas/root values are preserved while intranet IPs are standardized per node.
apiVersion: batch/v1
kind: Job
metadata:
name: metis-node-passwords-secret-ensure-4
namespace: sso
spec:
backoffLimit: 0
ttlSecondsAfterFinished: 3600
template:
spec:
serviceAccountName: mas-secrets-ensure
restartPolicy: Never
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/worker
operator: Exists
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/arch
operator: In
values: ["arm64"]
containers:
- name: apply
image: registry.bstein.dev/bstein/kubectl:1.35.0
command: ["/bin/sh", "-c"]
args:
- |
set -eu
vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}"
vault_role="${VAULT_ROLE:-sso-secrets}"
jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')"
vault_token="$(curl -sS --request POST --data "${login_payload}" "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')"
if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then
echo "vault login failed" >&2
exit 1
fi
ensured=0
while read -r node intranet_ip; do
if [ -z "${node}" ] || [ -z "${intranet_ip}" ]; then
continue
fi
secret_path="kv/data/atlas/nodes/${node}"
read_status="$(curl -sS -o /tmp/node-read.json -w "%{http_code}" -H "X-Vault-Token: ${vault_token}" "${vault_addr}/v1/${secret_path}" || true)"
if [ "${read_status}" = "200" ]; then
atlas_password="$(jq -r '.data.data.atlas_password // empty' /tmp/node-read.json)"
root_password="$(jq -r '.data.data.root_password // empty' /tmp/node-read.json)"
elif [ "${read_status}" = "404" ]; then
atlas_password=""
root_password=""
else
echo "Vault read failed for ${node} (status ${read_status})" >&2
cat /tmp/node-read.json >&2 || true
exit 1
fi
payload="$(jq -nc --arg atlas_password "${atlas_password}" --arg root_password "${root_password}" --arg intranet_ip "${intranet_ip}" '{data:{atlas_password:$atlas_password,root_password:$root_password,intranet_ip:$intranet_ip}}')"
write_status="$(curl -sS -o /tmp/node-write.json -w "%{http_code}" -X POST -H "X-Vault-Token: ${vault_token}" -H 'Content-Type: application/json' -d "${payload}" "${vault_addr}/v1/${secret_path}")"
if [ "${write_status}" != "200" ] && [ "${write_status}" != "204" ]; then
echo "Vault write failed for ${node} (status ${write_status})" >&2
cat /tmp/node-write.json >&2 || true
exit 1
fi
ensured=$((ensured + 1))
echo "Ensured node secret placeholder for ${node} (${intranet_ip})"
done <<'EOF_NODES'
titan-jh 192.168.22.8
titan-db 192.168.22.10
titan-0a 192.168.22.11
titan-0b 192.168.22.12
titan-0c 192.168.22.13
titan-20 192.168.22.20
titan-21 192.168.22.21
titan-22 192.168.22.22
titan-23 192.168.22.23
titan-24 192.168.22.26
titan-04 192.168.22.30
titan-05 192.168.22.31
titan-06 192.168.22.32
titan-07 192.168.22.33
titan-08 192.168.22.34
titan-09 192.168.22.35
titan-10 192.168.22.36
titan-11 192.168.22.37
titan-12 192.168.22.40
titan-13 192.168.22.41
titan-14 192.168.22.42
titan-15 192.168.22.43
titan-16 192.168.22.44
titan-17 192.168.22.45
titan-18 192.168.22.46
titan-19 192.168.22.47
EOF_NODES
echo "Ensured ${ensured} Metis node placeholders in Vault"

View File

@ -73,7 +73,7 @@ spec:
CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)" CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)"
if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then
create_payload='{"clientId":"metis","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://recovery.bstein.dev/oauth2/callback"],"webOrigins":["https://recovery.bstein.dev"],"rootUrl":"https://recovery.bstein.dev","baseUrl":"/"}' create_payload='{"clientId":"metis","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://sentinel.bstein.dev/oauth2/callback"],"webOrigins":["https://sentinel.bstein.dev"],"rootUrl":"https://sentinel.bstein.dev","baseUrl":"/"}'
status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \ status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
-H "Authorization: Bearer ${ACCESS_TOKEN}" \ -H "Authorization: Bearer ${ACCESS_TOKEN}" \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \
@ -121,7 +121,7 @@ spec:
fi fi
fi fi
update_payload='{"enabled":true,"clientId":"metis","protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://recovery.bstein.dev/oauth2/callback"],"webOrigins":["https://recovery.bstein.dev"],"rootUrl":"https://recovery.bstein.dev","baseUrl":"/"}' update_payload='{"enabled":true,"clientId":"metis","protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://sentinel.bstein.dev/oauth2/callback"],"webOrigins":["https://sentinel.bstein.dev"],"rootUrl":"https://sentinel.bstein.dev","baseUrl":"/"}'
status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \ status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \
-H "Authorization: Bearer ${ACCESS_TOKEN}" \ -H "Authorization: Bearer ${ACCESS_TOKEN}" \
-H 'Content-Type: application/json' \ -H 'Content-Type: application/json' \

View File

@ -1,198 +0,0 @@
# services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml
# One-off job for sso/quality-oidc-secret-ensure-1.
# Purpose: ensure the SonarQube oauth2-proxy OIDC client and Vault secret exist.
# Keep this completed Job around; bump the suffix if it ever needs to be rerun.
apiVersion: batch/v1
kind: Job
metadata:
name: quality-oidc-secret-ensure-1
namespace: sso
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "sso-secrets"
vault.hashicorp.com/agent-inject-secret-keycloak-admin-env.sh: "kv/data/atlas/shared/keycloak-admin"
vault.hashicorp.com/agent-inject-template-keycloak-admin-env.sh: |
{{ with secret "kv/data/atlas/shared/keycloak-admin" }}
export KEYCLOAK_ADMIN="{{ .Data.data.username }}"
export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}"
export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}"
{{ end }}
spec:
serviceAccountName: mas-secrets-ensure
restartPolicy: Never
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/worker
operator: Exists
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/arch
operator: In
values: ["arm64"]
containers:
- name: apply
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
command: ["/bin/sh", "-c"]
args:
- |
set -euo pipefail
. /vault/secrets/keycloak-admin-env.sh
KC_URL="http://keycloak.sso.svc.cluster.local"
ACCESS_TOKEN=""
for attempt in 1 2 3 4 5; do
TOKEN_JSON="$(curl -sS -X POST "$KC_URL/realms/master/protocol/openid-connect/token" \
-H 'Content-Type: application/x-www-form-urlencoded' \
-d "grant_type=password" \
-d "client_id=admin-cli" \
-d "username=${KEYCLOAK_ADMIN}" \
-d "password=${KEYCLOAK_ADMIN_PASSWORD}" || true)"
ACCESS_TOKEN="$(echo "$TOKEN_JSON" | jq -r '.access_token' 2>/dev/null || true)"
if [ -n "$ACCESS_TOKEN" ] && [ "$ACCESS_TOKEN" != "null" ]; then
break
fi
echo "Keycloak token request failed (attempt ${attempt})" >&2
sleep $((attempt * 2))
done
if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then
echo "Failed to fetch Keycloak admin token" >&2
exit 1
fi
CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/clients?clientId=sonarqube" || true)"
CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)"
if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then
create_payload='{"clientId":"sonarqube","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://quality.bstein.dev/oauth2/callback"],"webOrigins":["https://quality.bstein.dev"],"rootUrl":"https://quality.bstein.dev","baseUrl":"/"}'
status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
-H 'Content-Type: application/json' \
-d "${create_payload}" \
"$KC_URL/admin/realms/atlas/clients")"
if [ "$status" != "201" ] && [ "$status" != "204" ] && [ "$status" != "409" ]; then
echo "Keycloak client create failed (status ${status})" >&2
exit 1
fi
CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/clients?clientId=sonarqube" || true)"
CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)"
fi
if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then
echo "Keycloak client sonarqube not found" >&2
exit 1
fi
SCOPE_ID="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/client-scopes?search=groups" | jq -r '.[] | select(.name=="groups") | .id' 2>/dev/null | head -n1 || true)"
if [ -z "$SCOPE_ID" ] || [ "$SCOPE_ID" = "null" ]; then
echo "Keycloak client scope groups not found" >&2
exit 1
fi
DEFAULT_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/default-client-scopes" || true)"
OPTIONAL_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes" || true)"
if ! echo "$DEFAULT_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1 \
&& ! echo "$OPTIONAL_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1; then
status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")"
if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then
status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")"
if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then
echo "Failed to attach groups client scope to sonarqube (status ${status})" >&2
exit 1
fi
fi
fi
update_payload='{"enabled":true,"clientId":"sonarqube","protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://quality.bstein.dev/oauth2/callback"],"webOrigins":["https://quality.bstein.dev"],"rootUrl":"https://quality.bstein.dev","baseUrl":"/"}'
status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
-H 'Content-Type: application/json' \
-d "${update_payload}" \
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}")"
if [ "$status" != "204" ]; then
echo "Keycloak client update failed (status ${status})" >&2
exit 1
fi
CLIENT_SECRET="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value' 2>/dev/null || true)"
if [ -z "$CLIENT_SECRET" ] || [ "$CLIENT_SECRET" = "null" ]; then
echo "Keycloak client secret not found" >&2
exit 1
fi
vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}"
vault_role="${VAULT_ROLE:-sso-secrets}"
jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')"
vault_token="$(curl -sS --request POST --data "${login_payload}" \
"${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')"
if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then
echo "vault login failed" >&2
exit 1
fi
read_status="$(curl -sS -o /tmp/sonarqube-oidc-read.json -w "%{http_code}" \
-H "X-Vault-Token: ${vault_token}" \
"${vault_addr}/v1/kv/data/atlas/quality/sonarqube-oidc" || true)"
COOKIE_SECRET=""
if [ "${read_status}" = "200" ]; then
COOKIE_SECRET="$(jq -r '.data.data.cookie_secret // empty' /tmp/sonarqube-oidc-read.json)"
elif [ "${read_status}" != "404" ]; then
echo "Vault read failed (status ${read_status})" >&2
cat /tmp/sonarqube-oidc-read.json >&2 || true
exit 1
fi
if [ -n "${COOKIE_SECRET}" ]; then
length="$(printf '%s' "${COOKIE_SECRET}" | wc -c | tr -d ' ')"
if [ "${length}" != "16" ] && [ "${length}" != "24" ] && [ "${length}" != "32" ]; then
COOKIE_SECRET=""
fi
fi
if [ -z "${COOKIE_SECRET}" ]; then
COOKIE_SECRET="$(openssl rand -hex 16 | tr -d '\n')"
fi
payload="$(jq -nc \
--arg client_id "sonarqube" \
--arg client_secret "${CLIENT_SECRET}" \
--arg cookie_secret "${COOKIE_SECRET}" \
'{data:{client_id:$client_id,client_secret:$client_secret,cookie_secret:$cookie_secret}}')"
write_status="$(curl -sS -o /tmp/sonarqube-oidc-write.json -w "%{http_code}" -X POST \
-H "X-Vault-Token: ${vault_token}" \
-H 'Content-Type: application/json' \
-d "${payload}" "${vault_addr}/v1/kv/data/atlas/quality/sonarqube-oidc")"
if [ "${write_status}" != "200" ] && [ "${write_status}" != "204" ]; then
echo "Vault write failed (status ${write_status})" >&2
cat /tmp/sonarqube-oidc-write.json >&2 || true
exit 1
fi
verify_status="$(curl -sS -o /tmp/sonarqube-oidc-verify.json -w "%{http_code}" \
-H "X-Vault-Token: ${vault_token}" \
"${vault_addr}/v1/kv/data/atlas/quality/sonarqube-oidc" || true)"
if [ "${verify_status}" != "200" ]; then
echo "Vault verify failed (status ${verify_status})" >&2
cat /tmp/sonarqube-oidc-verify.json >&2 || true
exit 1
fi
echo "SonarQube OIDC secret ready in Vault"

View File

@ -8,6 +8,7 @@ spec:
restartPolicy: Never restartPolicy: Never
serviceAccountName: jenkins serviceAccountName: jenkins
nodeSelector: nodeSelector:
hardware: rpi5
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
containers: containers:
- name: git - name: git
@ -15,11 +16,6 @@ spec:
command: command:
- cat - cat
tty: true tty: true
- name: quality-tools
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
command:
- cat
tty: true
- name: kaniko - name: kaniko
image: gcr.io/kaniko-project/executor:v1.23.2-debug image: gcr.io/kaniko-project/executor:v1.23.2-debug
command: command:
@ -27,7 +23,7 @@ spec:
tty: true tty: true
resources: resources:
requests: requests:
cpu: "100m" cpu: "500m"
memory: "1Gi" memory: "1Gi"
limits: limits:
cpu: "1500m" cpu: "1500m"
@ -38,13 +34,7 @@ spec:
environment { environment {
SUITE_NAME = 'data_prepper' SUITE_NAME = 'data_prepper'
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091' PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
SONARQUBE_PROJECT_KEY = 'data_prepper'
SONARQUBE_TOKEN = credentials('sonarqube-token')
QUALITY_GATE_SONARQUBE_ENFORCE = '1'
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json' QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
QUALITY_GATE_IRONBANK_ENFORCE = '1'
QUALITY_GATE_IRONBANK_REQUIRED = '1'
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json' QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
} }
parameters { parameters {
@ -66,75 +56,6 @@ spec:
} }
stage('Collect quality evidence') { stage('Collect quality evidence') {
steps { steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
args=(
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
"-Dsonar.login=${SONARQUBE_TOKEN}"
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.sources=services/logging,dockerfiles"
"-Dsonar.inclusions=services/logging/Jenkinsfile.data-prepper,dockerfiles/Dockerfile.data-prepper"
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**"
)
set +e
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
sonar_rc=${PIPESTATUS[0]}
sonar_report="${QUALITY_GATE_SONARQUBE_REPORT:-build/sonarqube-quality-gate.json}"
host="${SONARQUBE_HOST_URL%/}"
query="$(printf '%s' "${SONARQUBE_PROJECT_KEY}" | sed 's/ /%20/g')"
sonar_ok=0
if [ -n "${SONARQUBE_TOKEN:-}" ]; then
auth="$(printf '%s:' "${SONARQUBE_TOKEN}" | base64 | tr -d '\\n')"
if command -v curl >/dev/null 2>&1; then
curl -fsS -H "Authorization: Basic ${auth}" "${host}/api/qualitygates/project_status?projectKey=${query}" > "${sonar_report}" && sonar_ok=1
elif command -v wget >/dev/null 2>&1; then
wget -qO "${sonar_report}" --header="Authorization: Basic ${auth}" "${host}/api/qualitygates/project_status?projectKey=${query}" && sonar_ok=1
fi
elif command -v curl >/dev/null 2>&1; then
curl -fsS "${host}/api/qualitygates/project_status?projectKey=${query}" > "${sonar_report}" && sonar_ok=1
elif command -v wget >/dev/null 2>&1; then
wget -qO "${sonar_report}" "${host}/api/qualitygates/project_status?projectKey=${query}" && sonar_ok=1
fi
if [ "${sonar_ok}" -ne 1 ]; then
cat > "${sonar_report}" <<EOF
{
"status": "ERROR",
"error": "sonarqube query failed"
}
EOF
fi
scan_root=build/data-prepper-supply-chain-scan
rm -rf "${scan_root}"
mkdir -p "${scan_root}/dockerfiles" "${scan_root}/services/logging"
cp dockerfiles/Dockerfile.data-prepper "${scan_root}/dockerfiles/Dockerfile.data-prepper"
cp services/logging/Jenkinsfile.data-prepper "${scan_root}/services/logging/Jenkinsfile.data-prepper"
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL "${scan_root}"
trivy_rc=$?
set -e
printf '%s\n' "${sonar_rc}" > build/sonarqube-analysis.rc
if [ ! -s build/trivy-fs.json ]; then
cat > build/ironbank-compliance.json <<EOF
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
EOF
exit 0
fi
critical="$(jq '[.Results[]? | .Vulnerabilities[]? | select(.Severity=="CRITICAL")] | length' build/trivy-fs.json)"
high="$(jq '[.Results[]? | .Vulnerabilities[]? | select(.Severity=="HIGH")] | length' build/trivy-fs.json)"
secrets="$(jq '[.Results[]? | .Secrets[]?] | length' build/trivy-fs.json)"
misconfigs="$(jq '[.Results[]? | .Misconfigurations[]? | select(.Status=="FAIL" and (.Severity=="CRITICAL" or .Severity=="HIGH"))] | length' build/trivy-fs.json)"
status=ok
compliant=true
if [ "${critical}" -gt 0 ] || [ "${secrets}" -gt 0 ] || [ "${misconfigs}" -gt 0 ]; then
status=failed
compliant=false
fi
jq -n --arg status "${status}" --argjson compliant "${compliant}" --argjson critical "${critical}" --argjson high "${high}" --argjson secrets "${secrets}" --argjson misconfigs "${misconfigs}" --argjson trivy_rc "${trivy_rc}" \
'{status:$status, compliant:$compliant, category:"image_compliance", scan_type:"filesystem", scanner:"trivy", critical_vulnerabilities:$critical, high_vulnerabilities:$high, secrets:$secrets, high_or_critical_misconfigurations:$misconfigs, trivy_rc:$trivy_rc, high_vulnerability_policy:"observe"}' > build/ironbank-compliance.json
'''
}
container('git') { container('git') {
sh ''' sh '''
set -euo pipefail set -euo pipefail
@ -206,148 +127,12 @@ EOF
} }
} }
} }
stage('Validation tests') {
steps {
container('git') {
sh '''#!/usr/bin/env sh
set -eu
mkdir -p build
failures=0
cases=""
dockerfile_present_status="skipped"
pipeline_config_present_status="skipped"
logging_kustomization_includes_data_prepper_status="skipped"
add_case() {
name="$1"
message="$2"
status="passed"
if [ -n "${message}" ]; then
status="failed"
failures=$((failures + 1))
cases="${cases}"'<testcase classname="data_prepper.packaging" name="'"${name}"'"><failure message="'"${message}"'" /></testcase>'
else
cases="${cases}"'<testcase classname="data_prepper.packaging" name="'"${name}"'" />'
fi
case "${name}" in
dockerfile_present) dockerfile_present_status="${status}" ;;
pipeline_config_present) pipeline_config_present_status="${status}" ;;
logging_kustomization_includes_data_prepper) logging_kustomization_includes_data_prepper_status="${status}" ;;
esac
}
if [ -s dockerfiles/Dockerfile.data-prepper ]; then
add_case "dockerfile_present" ""
else
add_case "dockerfile_present" "dockerfiles/Dockerfile.data-prepper is missing or empty"
fi
if [ -s services/logging/scripts/data_prepper_pipelines.yaml ]; then
add_case "pipeline_config_present" ""
else
add_case "pipeline_config_present" "data_prepper_pipelines.yaml is missing or empty"
fi
kustomization_contents="$(cat services/logging/kustomization.yaml 2>/dev/null || true)"
case "${kustomization_contents}" in
*data-prepper-helmrelease.yaml*) add_case "logging_kustomization_includes_data_prepper" "" ;;
*) add_case "logging_kustomization_includes_data_prepper" "services/logging/kustomization.yaml does not include data-prepper HelmRelease" ;;
esac
cat > build/junit-data-prepper.xml <<EOF
<testsuite name="data_prepper.packaging" tests="3" failures="${failures}" errors="0" skipped="0">
${cases}
</testsuite>
EOF
passed=$((3 - failures))
cat > build/test-counts.env <<EOF
test_passed_count=${passed}
test_failed_count=${failures}
test_error_count=0
test_skipped_count=0
EOF
cat > build/testcase-status.env <<EOF
dockerfile_present_status=${dockerfile_present_status}
pipeline_config_present_status=${pipeline_config_present_status}
logging_kustomization_includes_data_prepper_status=${logging_kustomization_includes_data_prepper_status}
EOF
if [ "${failures}" -ne 0 ]; then
exit 1
fi
'''
}
}
}
stage('Enforce quality gate') {
steps {
container('git') {
sh '''
set -euo pipefail
apk add --no-cache jq >/dev/null 2>&1 || true
fail=0
enabled() {
case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
1|true|yes|on) return 0 ;;
*) return 1 ;;
esac
}
if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
sonar_status="$(jq -r '.status // .projectStatus.status // .qualityGate.status // empty' build/sonarqube-quality-gate.json 2>/dev/null | tr '[:upper:]' '[:lower:]')"
[ -n "${sonar_status}" ] || sonar_status="missing"
case "${sonar_status}" in
ok|pass|passed|success) ;;
*)
echo "sonarqube gate failed: ${sonar_status}" >&2
fail=1
;;
esac
fi
if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-1}"
compliant="$(jq -r '.compliant // empty' build/ironbank-compliance.json 2>/dev/null || true)"
supply_status=""
if [ "${compliant}" = "true" ]; then
supply_status="ok"
elif [ "${compliant}" = "false" ]; then
supply_status="failed"
else
supply_status="$(jq -r '.status // .result // .compliance // empty' build/ironbank-compliance.json 2>/dev/null | tr '[:upper:]' '[:lower:]')"
fi
[ -n "${supply_status}" ] || supply_status="missing"
case "${supply_status}" in
ok|pass|passed|success|compliant) ;;
not_applicable|na|n/a)
if enabled "${ironbank_required}"; then
echo "supply chain gate required but status=${supply_status}" >&2
fail=1
fi
;;
*)
if enabled "${ironbank_required}"; then
echo "supply chain gate failed: ${supply_status}" >&2
fail=1
else
echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
fi
;;
esac
fi
exit "${fail}"
'''
}
}
}
stage('Build & Push') { stage('Build & Push') {
steps { steps {
container('kaniko') { container('kaniko') {
withCredentials([usernamePassword(credentialsId: 'harbor-robot-streaming', usernameVariable: 'HARBOR_USERNAME', passwordVariable: 'HARBOR_PASSWORD')]) { withCredentials([usernamePassword(credentialsId: 'harbor-robot', usernameVariable: 'HARBOR_USERNAME', passwordVariable: 'HARBOR_PASSWORD')]) {
sh ''' sh '''
set -euo pipefail set -euo pipefail
IMAGE_TAG="${IMAGE_TAG:-2.8.0}"
PUSH_LATEST="${PUSH_LATEST:-true}"
if [ -z "${HARBOR_REPO:-}" ] || [ "${HARBOR_REPO}" = "registry.bstein.dev/monitoring/data-prepper" ]; then if [ -z "${HARBOR_REPO:-}" ] || [ "${HARBOR_REPO}" = "registry.bstein.dev/monitoring/data-prepper" ]; then
HARBOR_REPO="registry.bstein.dev/streaming/data-prepper" HARBOR_REPO="registry.bstein.dev/streaming/data-prepper"
fi fi
@ -441,93 +226,34 @@ EOF
if [ "${status}" != "ok" ]; then if [ "${status}" != "ok" ]; then
gate_glue_check="failed" gate_glue_check="failed"
fi fi
metric_branch_raw="${BRANCH_NAME:-${GIT_BRANCH:-unknown}}" cat <<METRICS | curl -fsS -X PUT --data-binary @- "${gateway}/metrics/job/platform-quality-ci/suite/${suite}" >/dev/null || \
metric_branch_raw="${metric_branch_raw#origin/}" echo "warning: metrics push failed for suite=${suite}" >&2
metric_branch="$(printf '%s' "${metric_branch_raw}" | jq -Rsa . | sed -e 's/^"//' -e 's/"$//')"
metric_build_number="$(printf '%s' "${BUILD_NUMBER:-unknown}" | jq -Rsa . | sed -e 's/^"//' -e 's/"$//')"
metric_jenkins_job="$(printf '%s' "${JOB_NAME:-data-prepper}" | jq -Rsa . | sed -e 's/^"//' -e 's/"$//')"
export METRIC_SUITE="${suite}"
export METRIC_BRANCH_RAW="${metric_branch_raw}"
export METRIC_BUILD_NUMBER_RAW="${BUILD_NUMBER:-unknown}"
export METRIC_JENKINS_JOB_RAW="${JOB_NAME:-data-prepper}"
if [ ! -s build/test-counts.env ] || [ ! -s build/testcase-status.env ]; then
cat > build/test-counts.env <<EOF
test_passed_count=0
test_failed_count=0
test_error_count=0
test_skipped_count=1
EOF
cat > build/testcase-status.env <<EOF
dockerfile_present_status=skipped
pipeline_config_present_status=skipped
logging_kustomization_includes_data_prepper_status=skipped
EOF
fi
. build/testcase-status.env
if [ "${dockerfile_present_status}" = "skipped" ] && [ "${pipeline_config_present_status}" = "skipped" ] && [ "${logging_kustomization_includes_data_prepper_status}" = "skipped" ]; then
cat > build/testcase-metrics.prom <<METRICS
platform_quality_gate_test_case_result{suite="${suite}",branch="${metric_branch}",build_number="${metric_build_number}",jenkins_job="${metric_jenkins_job}",test="__no_test_cases__",status="skipped"} 1
METRICS
else
cat > build/testcase-metrics.prom <<METRICS
platform_quality_gate_test_case_result{suite="${suite}",branch="${metric_branch}",build_number="${metric_build_number}",jenkins_job="${metric_jenkins_job}",test="data_prepper.packaging::dockerfile_present",status="${dockerfile_present_status}"} 1
platform_quality_gate_test_case_result{suite="${suite}",branch="${metric_branch}",build_number="${metric_build_number}",jenkins_job="${metric_jenkins_job}",test="data_prepper.packaging::pipeline_config_present",status="${pipeline_config_present_status}"} 1
platform_quality_gate_test_case_result{suite="${suite}",branch="${metric_branch}",build_number="${metric_build_number}",jenkins_job="${metric_jenkins_job}",test="data_prepper.packaging::logging_kustomization_includes_data_prepper",status="${logging_kustomization_includes_data_prepper_status}"} 1
METRICS
fi
. build/test-counts.env
tests_check="ok"
if [ "$((test_failed_count + test_error_count))" -gt 0 ]; then
tests_check="failed"
fi
cat > build/platform-quality-metrics.prom <<METRICS
# TYPE platform_quality_gate_runs_total counter # TYPE platform_quality_gate_runs_total counter
platform_quality_gate_runs_total{suite="${suite}",status="ok"} ${ok_count} platform_quality_gate_runs_total{suite="${suite}",status="ok"} ${ok_count}
platform_quality_gate_runs_total{suite="${suite}",status="failed"} ${failed_count} platform_quality_gate_runs_total{suite="${suite}",status="failed"} ${failed_count}
# TYPE data_prepper_quality_gate_tests_total gauge # TYPE data_prepper_quality_gate_tests_total gauge
data_prepper_quality_gate_tests_total{suite="${suite}",result="passed"} ${test_passed_count} data_prepper_quality_gate_tests_total{suite="${suite}",result="passed"} 0
data_prepper_quality_gate_tests_total{suite="${suite}",result="failed"} ${test_failed_count} data_prepper_quality_gate_tests_total{suite="${suite}",result="failed"} 0
data_prepper_quality_gate_tests_total{suite="${suite}",result="error"} ${test_error_count} data_prepper_quality_gate_tests_total{suite="${suite}",result="error"} 0
data_prepper_quality_gate_tests_total{suite="${suite}",result="skipped"} ${test_skipped_count} data_prepper_quality_gate_tests_total{suite="${suite}",result="skipped"} 0
# TYPE platform_quality_gate_workspace_line_coverage_percent gauge # TYPE platform_quality_gate_workspace_line_coverage_percent gauge
# No coverable project source is present in this packaging suite; report full platform_quality_gate_workspace_line_coverage_percent{suite="${suite}"} 0
# non-applicable coverage so rollups do not confuse N/A with uncovered code.
platform_quality_gate_workspace_line_coverage_percent{suite="${suite}"} 100
# TYPE platform_quality_gate_source_lines_over_500_total gauge # TYPE platform_quality_gate_source_lines_over_500_total gauge
platform_quality_gate_source_lines_over_500_total{suite="${suite}"} 0 platform_quality_gate_source_lines_over_500_total{suite="${suite}"} 0
# TYPE platform_quality_gate_build_info gauge # TYPE platform_quality_gate_test_case_result gauge
platform_quality_gate_build_info{suite="${suite}",branch="${metric_branch}",build_number="${metric_build_number}",jenkins_job="${metric_jenkins_job}"} 1 platform_quality_gate_test_case_result{suite="${suite}",test="__no_test_cases__",status="skipped"} 1
# TYPE data_prepper_quality_gate_checks_total gauge # TYPE data_prepper_quality_gate_checks_total gauge
data_prepper_quality_gate_checks_total{suite="${suite}",check="tests",result="${tests_check}"} 1 data_prepper_quality_gate_checks_total{suite="${suite}",check="tests",result="not_applicable"} 1
data_prepper_quality_gate_checks_total{suite="${suite}",check="coverage",result="not_applicable"} 1 data_prepper_quality_gate_checks_total{suite="${suite}",check="coverage",result="not_applicable"} 1
data_prepper_quality_gate_checks_total{suite="${suite}",check="loc",result="not_applicable"} 1 data_prepper_quality_gate_checks_total{suite="${suite}",check="loc",result="not_applicable"} 1
data_prepper_quality_gate_checks_total{suite="${suite}",check="docs_naming",result="not_applicable"} 1 data_prepper_quality_gate_checks_total{suite="${suite}",check="docs_naming",result="not_applicable"} 1
data_prepper_quality_gate_checks_total{suite="${suite}",check="gate_glue",result="${gate_glue_check}"} 1 data_prepper_quality_gate_checks_total{suite="${suite}",check="gate_glue",result="${gate_glue_check}"} 1
data_prepper_quality_gate_checks_total{suite="${suite}",check="sonarqube",result="${sonarqube_check}"} 1 data_prepper_quality_gate_checks_total{suite="${suite}",check="sonarqube",result="${sonarqube_check}"} 1
data_prepper_quality_gate_checks_total{suite="${suite}",check="supply_chain",result="${supply_chain_check}"} 1 data_prepper_quality_gate_checks_total{suite="${suite}",check="supply_chain",result="${supply_chain_check}"} 1
# TYPE platform_quality_gate_test_case_result gauge
METRICS METRICS
cat build/testcase-metrics.prom >> build/platform-quality-metrics.prom
push_status="$(
curl -sS -o build/pushgateway-response.txt -w '%{http_code}' -X PUT \
--data-binary @build/platform-quality-metrics.prom \
"${gateway}/metrics/job/platform-quality-ci/suite/${suite}" || true
)"
case "${push_status}" in
200|202) ;;
*)
echo "warning: metrics push failed for suite=${suite} status=${push_status}" >&2
cat build/pushgateway-response.txt >&2 || true
;;
esac
''' '''
} }
script { archiveArtifacts artifacts: 'build/**/*.json,build/**/*.xml,build/**/*.txt,build/**/*.rc', allowEmptyArchive: true, fingerprint: true
if (fileExists('build/junit-data-prepper.xml')) {
echo 'JUnit XML generated and archived under build/; Jenkins junit step is not installed on this controller.'
}
}
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
} }
} }
} }

View File

@ -44,7 +44,8 @@ spec:
path: /var/log/journal path: /var/log/journal
- name: fluentbit-state - name: fluentbit-state
emptyDir: emptyDir:
sizeLimit: 1Gi medium: Memory
sizeLimit: 64Mi
extraVolumeMounts: extraVolumeMounts:
- name: runlogjournal - name: runlogjournal
mountPath: /run/log/journal mountPath: /run/log/journal

View File

@ -18,7 +18,6 @@ resources:
- oneoffs/opensearch-ism-job.yaml - oneoffs/opensearch-ism-job.yaml
- oneoffs/opensearch-dashboards-setup-job.yaml - oneoffs/opensearch-dashboards-setup-job.yaml
- oneoffs/opensearch-observability-setup-job.yaml - oneoffs/opensearch-observability-setup-job.yaml
- opensearch-prune-cronjob.yaml
- fluent-bit-helmrelease.yaml - fluent-bit-helmrelease.yaml
- node-log-rotation-daemonset.yaml - node-log-rotation-daemonset.yaml
- node-image-gc-rpi4-daemonset.yaml - node-image-gc-rpi4-daemonset.yaml
@ -46,12 +45,6 @@ configMapGenerator:
- node_image_prune_rpi5.sh=scripts/node_image_prune_rpi5.sh - node_image_prune_rpi5.sh=scripts/node_image_prune_rpi5.sh
options: options:
disableNameSuffixHash: true disableNameSuffixHash: true
- name: opensearch-prune-script
namespace: logging
files:
- prune.py=scripts/opensearch_prune.py
options:
disableNameSuffixHash: true
- name: opensearch-observability-script - name: opensearch-observability-script
namespace: logging namespace: logging
files: files:

View File

@ -12,8 +12,6 @@ spec:
type: RollingUpdate type: RollingUpdate
template: template:
metadata: metadata:
annotations:
logging.bstein.dev/node-log-rotation-rev: "2026-04-27-3"
labels: labels:
app: node-log-rotation app: node-log-rotation
spec: spec:

View File

@ -1,48 +0,0 @@
# services/logging/opensearch-prune-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: opensearch-prune
namespace: logging
spec:
schedule: "23 3 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 2
template:
spec:
restartPolicy: OnFailure
nodeSelector:
node-role.kubernetes.io/worker: "true"
hardware: rpi5
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: hardware
operator: In
values:
- rpi5
containers:
- name: prune
image: python:3.11-alpine
command: ["python", "/scripts/prune.py"]
env:
- name: OPENSEARCH_URL
value: http://opensearch-master.logging.svc.cluster.local:9200
- name: LOG_LIMIT_BYTES
value: "1099511627776"
- name: LOG_INDEX_PATTERNS
value: "kube-*,journald-*,trace-analytics-*"
volumeMounts:
- name: scripts
mountPath: /scripts
volumes:
- name: scripts
configMap:
name: opensearch-prune-script

View File

@ -99,24 +99,4 @@ if [ "${changed}" -eq 1 ]; then
fi fi
fi fi
trim_constrained_pod_logs() { sleep infinity
local base usage
for base in /host/mnt/astraios/var/log /host/var/log.hdd; do
if [ ! -d "${base}/pods" ]; then
continue
fi
usage="$(df -P "${base}" | awk 'NR==2 {gsub(/%/, "", $5); print $5}')"
if [ -z "${usage}" ] || [ "${usage}" -lt 75 ]; then
continue
fi
find "${base}/pods" -type f \( -name '[1-9]*.log' -o -name '*.log.20*' \) -size +1M -print -exec truncate -s 0 {} \; 2>/dev/null || true
if [ -d "${base}/containers" ]; then
find "${base}/containers" -xtype l -print -delete 2>/dev/null || true
fi
done
}
while true; do
trim_constrained_pod_logs
sleep 600
done

View File

@ -1,77 +0,0 @@
import json
import os
import re
import sys
import urllib.error
import urllib.request
os_url = os.environ.get("OPENSEARCH_URL", "http://opensearch-master.logging.svc.cluster.local:9200").rstrip("/")
limit_bytes = int(os.environ.get("LOG_LIMIT_BYTES", str(1024**4)))
patterns = [p.strip() for p in os.environ.get("LOG_INDEX_PATTERNS", "kube-*,journald-*").split(",") if p.strip()]
UNITS = {
"b": 1,
"kb": 1024,
"mb": 1024**2,
"gb": 1024**3,
"tb": 1024**4,
}
def parse_size(value: str) -> int:
if not value:
return 0
text = value.strip().lower()
if text in ("-", "0"):
return 0
match = re.match(r"^([0-9.]+)([a-z]+)$", text)
if not match:
return 0
number = float(match.group(1))
unit = match.group(2)
if unit not in UNITS:
return 0
return int(number * UNITS[unit])
def request_json(path: str):
url = f"{os_url}{path}"
with urllib.request.urlopen(url, timeout=30) as response:
payload = response.read().decode("utf-8")
return json.loads(payload)
def delete_index(index: str) -> None:
url = f"{os_url}/{index}"
req = urllib.request.Request(url, method="DELETE")
with urllib.request.urlopen(req, timeout=30) as response:
_ = response.read()
print(f"deleted {index}")
indices = []
for pattern in patterns:
try:
data = request_json(f"/_cat/indices/{pattern}?format=json&h=index,store.size,creation.date")
except urllib.error.HTTPError as exc:
if exc.code == 404:
continue
raise
for item in data:
index = item.get("index")
if not index or index.startswith("."):
continue
size = parse_size(item.get("store.size", ""))
created = int(item.get("creation.date", "0") or 0)
indices.append({"index": index, "size": size, "created": created})
total = sum(item["size"] for item in indices)
print(f"total_log_bytes={total}")
if total <= limit_bytes:
print("within limit")
sys.exit(0)
indices.sort(key=lambda item: item["created"])
for item in indices:
if total <= limit_bytes:
break
delete_index(item["index"])
total -= item["size"]
print(f"remaining_log_bytes={total}")

View File

@ -764,15 +764,6 @@ spec:
spec: spec:
template: template:
spec: spec:
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values: ["titan-13", "titan-15", "titan-17", "titan-19"]
containers: containers:
- name: tika - name: tika
env: env:

View File

@ -14,7 +14,6 @@ resources:
- serverstransport.yaml - serverstransport.yaml
- ingressroute.yaml - ingressroute.yaml
- oneoffs/mailu-sync-job.yaml - oneoffs/mailu-sync-job.yaml
- mailu-sync-cronjob.yaml
- front-lb.yaml - front-lb.yaml
configMapGenerator: configMapGenerator:

View File

@ -1,93 +0,0 @@
# services/mailu/mailu-sync-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: mailu-sync-nightly
namespace: mailu-mailserver
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "30 4 * * *"
suspend: true
concurrencyPolicy: Forbid
jobTemplate:
spec:
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "mailu-mailserver"
vault.hashicorp.com/agent-inject-secret-mailu-db-secret__database: "kv/data/atlas/mailu/mailu-db-secret"
vault.hashicorp.com/agent-inject-template-mailu-db-secret__database: |
{{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.database }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mailu-db-secret__username: "kv/data/atlas/mailu/mailu-db-secret"
vault.hashicorp.com/agent-inject-template-mailu-db-secret__username: |
{{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.username }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mailu-db-secret__password: "kv/data/atlas/mailu/mailu-db-secret"
vault.hashicorp.com/agent-inject-template-mailu-db-secret__password: |
{{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.password }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-id: "kv/data/atlas/mailu/mailu-sync-credentials"
vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-id: |
{{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-id" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-secret: "kv/data/atlas/mailu/mailu-sync-credentials"
vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-secret: |
{{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-secret" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mailu-initial-account-secret__password: "kv/data/atlas/mailu/mailu-initial-account-secret"
vault.hashicorp.com/agent-inject-template-mailu-initial-account-secret__password: |
{{- with secret "kv/data/atlas/mailu/mailu-initial-account-secret" -}}{{ .Data.data.password }}{{- end -}}
spec:
restartPolicy: OnFailure
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
serviceAccountName: mailu-vault-sync
containers:
- name: mailu-sync
image: python:3.11-alpine
imagePullPolicy: IfNotPresent
command: ["/bin/sh", "-c"]
args:
- |
set -euo pipefail
. /vault/scripts/mailu_vault_env.sh
pip install --no-cache-dir requests psycopg2-binary passlib >/tmp/pip.log \
&& python /app/sync.py
env:
- name: KEYCLOAK_BASE_URL
value: http://keycloak.sso.svc.cluster.local
- name: KEYCLOAK_REALM
value: atlas
- name: MAILU_DOMAIN
value: bstein.dev
- name: MAILU_DEFAULT_QUOTA
value: "20000000000"
- name: MAILU_SYSTEM_USERS
value: "no-reply-portal@bstein.dev,no-reply-vaultwarden@bstein.dev"
- name: MAILU_DB_HOST
value: postgres-service.postgres.svc.cluster.local
- name: MAILU_DB_PORT
value: "5432"
volumeMounts:
- name: sync-script
mountPath: /app/sync.py
subPath: sync.py
- name: vault-scripts
mountPath: /vault/scripts
readOnly: true
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 200m
memory: 256Mi
volumes:
- name: sync-script
configMap:
name: mailu-sync-script
defaultMode: 0444
- name: vault-scripts
configMap:
name: mailu-vault-env
defaultMode: 0555

View File

@ -1,5 +1,3 @@
"""HTTP debounce wrapper for triggering the Mailu Keycloak sync job."""
import http.server import http.server
import json import json
import os import os

View File

@ -18,15 +18,13 @@ spec:
prometheus.io/scrape: "true" prometheus.io/scrape: "true"
prometheus.io/port: "8080" prometheus.io/port: "8080"
prometheus.io/path: "/metrics" prometheus.io/path: "/metrics"
maintenance.bstein.dev/restart-rev: "20260413-jenkins-api-2" maintenance.bstein.dev/restart-rev: "20260207-2"
vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "maintenance" vault.hashicorp.com/role: "maintenance"
vault.hashicorp.com/agent-inject-secret-ariadne-env.sh: "kv/data/atlas/maintenance/ariadne-db" vault.hashicorp.com/agent-inject-secret-ariadne-env.sh: "kv/data/atlas/maintenance/ariadne-db"
vault.hashicorp.com/agent-inject-template-ariadne-env.sh: | vault.hashicorp.com/agent-inject-template-ariadne-env.sh: |
{{ with secret "kv/data/atlas/maintenance/ariadne-db" }} {{ with secret "kv/data/atlas/maintenance/ariadne-db" }}
export ARIADNE_DATABASE_URL="{{ .Data.data.database_url }}" export ARIADNE_DATABASE_URL="{{ .Data.data.database_url }}"
export JENKINS_API_USER="{{ .Data.data.jenkins_api_user }}"
export JENKINS_API_TOKEN="{{ .Data.data.jenkins_api_token }}"
{{ end }} {{ end }}
{{ with secret "kv/data/atlas/portal/atlas-portal-db" }} {{ with secret "kv/data/atlas/portal/atlas-portal-db" }}
export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}" export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}"
@ -106,36 +104,6 @@ spec:
nodeSelector: nodeSelector:
kubernetes.io/arch: arm64 kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
containers: containers:
- name: ariadne - name: ariadne
image: registry.bstein.dev/bstein/ariadne:latest image: registry.bstein.dev/bstein/ariadne:latest
@ -340,9 +308,9 @@ spec:
- name: ARIADNE_SCHEDULE_IMAGE_SWEEPER - name: ARIADNE_SCHEDULE_IMAGE_SWEEPER
value: "0 */4 * * *" value: "0 */4 * * *"
- name: ARIADNE_SCHEDULE_VAULT_K8S_AUTH - name: ARIADNE_SCHEDULE_VAULT_K8S_AUTH
value: "0 0 1 1 *" value: "*/15 * * * *"
- name: ARIADNE_SCHEDULE_VAULT_OIDC - name: ARIADNE_SCHEDULE_VAULT_OIDC
value: "0 0 1 1 *" value: "*/15 * * * *"
- name: ARIADNE_SCHEDULE_COMMS_GUEST_NAME - name: ARIADNE_SCHEDULE_COMMS_GUEST_NAME
value: "*/5 * * * *" value: "*/5 * * * *"
- name: ARIADNE_SCHEDULE_COMMS_PIN_INVITE - name: ARIADNE_SCHEDULE_COMMS_PIN_INVITE
@ -377,12 +345,10 @@ spec:
value: "15" value: "15"
- name: ARIADNE_SCHEDULE_METIS_SENTINEL_WATCH - name: ARIADNE_SCHEDULE_METIS_SENTINEL_WATCH
value: "*/30 * * * *" value: "*/30 * * * *"
- name: ARIADNE_SCHEDULE_JENKINS_BUILD_WEATHER - name: ARIADNE_SCHEDULE_METIS_K3S_TOKEN_SYNC
value: "*/10 * * * *" value: "11 */6 * * *"
- name: JENKINS_BASE_URL - name: ARIADNE_SCHEDULE_PLATFORM_QUALITY_SUITE_PROBE
value: https://ci.bstein.dev value: "*/15 * * * *"
- name: JENKINS_API_TIMEOUT_SEC
value: "10"
- name: ARIADNE_SCHEDULE_JENKINS_WORKSPACE_CLEANUP - name: ARIADNE_SCHEDULE_JENKINS_WORKSPACE_CLEANUP
value: "45 */6 * * *" value: "45 */6 * * *"
- name: JENKINS_WORKSPACE_NAMESPACE - name: JENKINS_WORKSPACE_NAMESPACE
@ -392,7 +358,7 @@ spec:
- name: JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS - name: JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS
value: "24" value: "24"
- name: JENKINS_WORKSPACE_CLEANUP_DRY_RUN - name: JENKINS_WORKSPACE_CLEANUP_DRY_RUN
value: "false" value: "true"
- name: JENKINS_WORKSPACE_CLEANUP_MAX_DELETIONS_PER_RUN - name: JENKINS_WORKSPACE_CLEANUP_MAX_DELETIONS_PER_RUN
value: "20" value: "20"
- name: METRICS_PATH - name: METRICS_PATH

View File

@ -16,13 +16,6 @@ rules:
- apiGroups: [""] - apiGroups: [""]
resources: resources:
- pods - pods
verbs:
- get
- list
- watch
- delete
- apiGroups: [""]
resources:
- persistentvolumeclaims - persistentvolumeclaims
- persistentvolumes - persistentvolumes
verbs: verbs:

View File

@ -1,53 +0,0 @@
# services/maintenance/image-sweeper-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: image-sweeper
namespace: maintenance
spec:
schedule: "30 4 * * 0"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 2
failedJobsHistoryLimit: 2
jobTemplate:
spec:
template:
spec:
serviceAccountName: node-image-sweeper
restartPolicy: OnFailure
nodeSelector:
kubernetes.io/os: linux
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
containers:
- name: image-sweeper
image: python:3.12.9-alpine3.20
command: ["/bin/sh", "/scripts/node_image_sweeper.sh"]
env:
- name: ONE_SHOT
value: "true"
securityContext:
privileged: true
runAsUser: 0
volumeMounts:
- name: host-root
mountPath: /host
- name: script
mountPath: /scripts
readOnly: true
volumes:
- name: host-root
hostPath:
path: /
- name: script
configMap:
name: node-image-sweeper-script
defaultMode: 0555

View File

@ -36,29 +36,11 @@ spec:
apiVersion: image.toolkit.fluxcd.io/v1beta2 apiVersion: image.toolkit.fluxcd.io/v1beta2
kind: ImagePolicy kind: ImagePolicy
metadata: metadata:
name: metis-amd64 name: metis
namespace: maintenance namespace: maintenance
spec: spec:
imageRepositoryRef: imageRepositoryRef:
name: metis name: metis
filterTags:
pattern: '^(?P<version>0\.1\.0-\d+)-amd64$'
extract: '$version'
policy:
semver:
range: ">=0.1.0-0"
---
apiVersion: image.toolkit.fluxcd.io/v1beta2
kind: ImagePolicy
metadata:
name: metis-arm64
namespace: maintenance
spec:
imageRepositoryRef:
name: metis
filterTags:
pattern: '^(?P<version>0\.1\.0-\d+)-arm64$'
extract: '$version'
policy: policy:
semver: semver:
range: ">=0.1.0-0" range: ">=0.1.0-0"
@ -77,29 +59,11 @@ spec:
apiVersion: image.toolkit.fluxcd.io/v1beta2 apiVersion: image.toolkit.fluxcd.io/v1beta2
kind: ImagePolicy kind: ImagePolicy
metadata: metadata:
name: metis-sentinel-amd64 name: metis-sentinel
namespace: maintenance namespace: maintenance
spec: spec:
imageRepositoryRef: imageRepositoryRef:
name: metis-sentinel name: metis-sentinel
filterTags:
pattern: '^(?P<version>0\.1\.0-\d+)-amd64$'
extract: '$version'
policy:
semver:
range: ">=0.1.0-0"
---
apiVersion: image.toolkit.fluxcd.io/v1beta2
kind: ImagePolicy
metadata:
name: metis-sentinel-arm64
namespace: maintenance
spec:
imageRepositoryRef:
name: metis-sentinel
filterTags:
pattern: '^(?P<version>0\.1\.0-\d+)-arm64$'
extract: '$version'
policy: policy:
semver: semver:
range: ">=0.1.0-0" range: ">=0.1.0-0"

View File

@ -26,7 +26,6 @@ resources:
- metis-deployment.yaml - metis-deployment.yaml
- soteria-deployment.yaml - soteria-deployment.yaml
- oneoffs/ariadne-migrate-job.yaml - oneoffs/ariadne-migrate-job.yaml
- oneoffs/titan-24-rootfs-sweep-job.yaml
- ariadne-service.yaml - ariadne-service.yaml
- soteria-service.yaml - soteria-service.yaml
- disable-k3s-traefik-daemonset.yaml - disable-k3s-traefik-daemonset.yaml
@ -48,18 +47,12 @@ resources:
- metis-ingress.yaml - metis-ingress.yaml
images: images:
- name: registry.bstein.dev/bstein/ariadne - name: registry.bstein.dev/bstein/ariadne
newTag: 0.1.0-188 # {"$imagepolicy": "maintenance:ariadne:tag"} newTag: 0.1.0-22 # {"$imagepolicy": "maintenance:ariadne:tag"}
- name: registry.bstein.dev/bstein/metis - name: registry.bstein.dev/bstein/metis
newTag: 0.1.0-103-arm64 # {"$imagepolicy": "maintenance:metis-arm64:tag"} newTag: 0.1.0-9-amd64
- name: registry.bstein.dev/bstein/soteria - name: registry.bstein.dev/bstein/soteria
newTag: 0.1.0-36 # {"$imagepolicy": "maintenance:soteria:tag"} newTag: 0.1.0-35 # {"$imagepolicy": "maintenance:soteria:tag"}
configMapGenerator: configMapGenerator:
- name: metis-inventory
namespace: maintenance
files:
- inventory.yaml=metis-inventory.yaml
options:
disableNameSuffixHash: true
- name: disable-k3s-traefik-script - name: disable-k3s-traefik-script
namespace: maintenance namespace: maintenance
files: files:

View File

@ -2,12 +2,12 @@
apiVersion: cert-manager.io/v1 apiVersion: cert-manager.io/v1
kind: Certificate kind: Certificate
metadata: metadata:
name: recovery-tls name: sentinel-tls
namespace: maintenance namespace: maintenance
spec: spec:
secretName: recovery-tls secretName: sentinel-tls
issuerRef: issuerRef:
kind: ClusterIssuer kind: ClusterIssuer
name: letsencrypt name: letsencrypt
dnsNames: dnsNames:
- recovery.bstein.dev - sentinel.bstein.dev

View File

@ -8,21 +8,19 @@ data:
METIS_BIND_ADDR: :8080 METIS_BIND_ADDR: :8080
METIS_INVENTORY_PATH: /app/inventory.titan-rpi4.yaml METIS_INVENTORY_PATH: /app/inventory.titan-rpi4.yaml
METIS_DATA_DIR: /var/lib/metis METIS_DATA_DIR: /var/lib/metis
METIS_DEFAULT_FLASH_HOST: titan-20 METIS_DEFAULT_FLASH_HOST: titan-22
METIS_FLASH_HOSTS: titan-20,titan-21,titan-22,titan-24,titan-19,titan-17,titan-15,titan-14,titan-12,titan-11,titan-10,titan-09,titan-08,titan-07,titan-06,titan-05,titan-04,titan-0c,titan-0b,titan-0a METIS_FLASH_HOSTS: titan-22,titan-24,titan-20,titan-21,titan-19,titan-17,titan-15,titan-14,titan-12,titan-11,titan-10,titan-09,titan-08,titan-07,titan-06,titan-05,titan-04,titan-0c,titan-0b,titan-0a
METIS_LOCAL_HOST: titan-20 METIS_LOCAL_HOST: titan-22
METIS_ALLOWED_GROUPS: admin,maintenance METIS_ALLOWED_GROUPS: admin,maintenance
METIS_MAX_DEVICE_BYTES: "1000000000000" METIS_MAX_DEVICE_BYTES: "1000000000000"
METIS_NAMESPACE: maintenance METIS_NAMESPACE: maintenance
METIS_REMOTE_POD_TIMEOUT_SEC: "14400" METIS_RUNNER_IMAGE_AMD64: registry.bstein.dev/bstein/metis:0.1.0-23-amd64
METIS_RUNNER_IMAGE_AMD64: registry.bstein.dev/bstein/metis:0.1.0-103-amd64 # {"$imagepolicy": "maintenance:metis-amd64"} METIS_RUNNER_IMAGE_ARM64: registry.bstein.dev/bstein/metis:0.1.0-23-arm64
METIS_RUNNER_IMAGE_ARM64: registry.bstein.dev/bstein/metis:0.1.0-103-arm64 # {"$imagepolicy": "maintenance:metis-arm64"}
METIS_HARBOR_REGISTRY: registry.bstein.dev METIS_HARBOR_REGISTRY: registry.bstein.dev
METIS_HARBOR_PROJECT: metis METIS_HARBOR_PROJECT: metis
METIS_HARBOR_API_BASE: https://registry.bstein.dev/api/v2.0 METIS_HARBOR_API_BASE: https://registry.bstein.dev/api/v2.0
METIS_HARBOR_USERNAME: admin METIS_HARBOR_USERNAME: admin
METIS_HOST_TMP_DIR: /var/tmp/metis-flash-test METIS_HOST_TMP_DIR: /tmp/metis-flash-test
METIS_REMOTE_WORKSPACE_DIR: /var/tmp/metis-workspace
METIS_SENTINEL_PUSH_URL: http://metis.maintenance.svc.cluster.local/internal/sentinel/snapshot METIS_SENTINEL_PUSH_URL: http://metis.maintenance.svc.cluster.local/internal/sentinel/snapshot
METIS_SENTINEL_INTERVAL_SEC: "1800" METIS_SENTINEL_INTERVAL_SEC: "1800"
METIS_SENTINEL_NSENTER: "1" METIS_SENTINEL_NSENTER: "1"

View File

@ -2,7 +2,7 @@
apiVersion: v1 apiVersion: v1
kind: PersistentVolumeClaim kind: PersistentVolumeClaim
metadata: metadata:
name: metis-data-longhorn name: metis-data
namespace: maintenance namespace: maintenance
spec: spec:
accessModes: accessModes:
@ -10,4 +10,4 @@ spec:
resources: resources:
requests: requests:
storage: 40Gi storage: 40Gi
storageClassName: longhorn storageClassName: local-path

View File

@ -18,7 +18,7 @@ spec:
prometheus.io/scrape: "true" prometheus.io/scrape: "true"
prometheus.io/port: "8080" prometheus.io/port: "8080"
prometheus.io/path: "/metrics" prometheus.io/path: "/metrics"
metis.bstein.dev/config-rev: "2026-04-24-01" metis.bstein.dev/config-rev: "2026-04-06-02"
vault.hashicorp.com/agent-inject: "true" vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true" vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "maintenance" vault.hashicorp.com/role: "maintenance"
@ -27,15 +27,9 @@ spec:
{{ with secret "kv/data/atlas/maintenance/metis-runtime" }} {{ with secret "kv/data/atlas/maintenance/metis-runtime" }}
export METIS_K3S_TOKEN="{{ .Data.data.k3s_token }}" export METIS_K3S_TOKEN="{{ .Data.data.k3s_token }}"
{{ end }} {{ end }}
vault.hashicorp.com/agent-inject-secret-metis-harbor-env.sh: "kv/data/atlas/harbor/harbor-core"
vault.hashicorp.com/agent-inject-template-metis-harbor-env.sh: |
{{ with secret "kv/data/atlas/harbor/harbor-core" }}
export METIS_HARBOR_PASSWORD="{{ .Data.data.harbor_admin_password }}"
{{ end }}
vault.hashicorp.com/agent-inject-secret-metis-ssh-env.sh: "kv/data/atlas/maintenance/metis-ssh-keys" vault.hashicorp.com/agent-inject-secret-metis-ssh-env.sh: "kv/data/atlas/maintenance/metis-ssh-keys"
vault.hashicorp.com/agent-inject-template-metis-ssh-env.sh: | vault.hashicorp.com/agent-inject-template-metis-ssh-env.sh: |
{{ with secret "kv/data/atlas/maintenance/metis-ssh-keys" }} {{ with secret "kv/data/atlas/maintenance/metis-ssh-keys" }}
export METIS_SSH_KEY_BASTION="{{ or .Data.data.bastion_pub .Data.data.brad_pub "" }}"
export METIS_SSH_KEY_BRAD="{{ .Data.data.brad_pub }}" export METIS_SSH_KEY_BRAD="{{ .Data.data.brad_pub }}"
export METIS_SSH_KEY_ANANKE_TETHYS="{{ or .Data.data.ananke_tethys_pub .Data.data.hecate_tethys_pub "" }}" export METIS_SSH_KEY_ANANKE_TETHYS="{{ or .Data.data.ananke_tethys_pub .Data.data.hecate_tethys_pub "" }}"
export METIS_SSH_KEY_ANANKE_DB="{{ or .Data.data.ananke_db_pub .Data.data.hecate_db_pub "" }}" export METIS_SSH_KEY_ANANKE_DB="{{ or .Data.data.ananke_db_pub .Data.data.hecate_db_pub "" }}"
@ -43,31 +37,10 @@ spec:
spec: spec:
serviceAccountName: metis serviceAccountName: metis
terminationGracePeriodSeconds: 30 terminationGracePeriodSeconds: 30
affinity: nodeSelector:
nodeAffinity: kubernetes.io/hostname: titan-22
requiredDuringSchedulingIgnoredDuringExecution: kubernetes.io/arch: amd64
nodeSelectorTerms: node-role.kubernetes.io/accelerator: "true"
- matchExpressions:
- key: kubernetes.io/arch
operator: In
values:
- arm64
- key: longhorn-host
operator: In
values:
- "true"
- key: node-role.kubernetes.io/worker
operator: In
values:
- "true"
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
containers: containers:
- name: metis - name: metis
image: registry.bstein.dev/bstein/metis:latest image: registry.bstein.dev/bstein/metis:latest
@ -76,7 +49,6 @@ spec:
args: args:
- >- - >-
. /vault/secrets/metis-runtime-env.sh . /vault/secrets/metis-runtime-env.sh
&& . /vault/secrets/metis-harbor-env.sh
&& . /vault/secrets/metis-ssh-env.sh && . /vault/secrets/metis-ssh-env.sh
&& exec metis serve && exec metis serve
envFrom: envFrom:
@ -100,9 +72,6 @@ spec:
periodSeconds: 5 periodSeconds: 5
timeoutSeconds: 2 timeoutSeconds: 2
volumeMounts: volumeMounts:
- name: metis-inventory
mountPath: /etc/metis
readOnly: true
- name: metis-data - name: metis-data
mountPath: /var/lib/metis mountPath: /var/lib/metis
- name: host-dev - name: host-dev
@ -124,13 +93,9 @@ spec:
privileged: true privileged: true
runAsUser: 0 runAsUser: 0
volumes: volumes:
- name: metis-inventory
configMap:
name: metis-inventory
defaultMode: 0444
- name: metis-data - name: metis-data
persistentVolumeClaim: persistentVolumeClaim:
claimName: metis-data-longhorn claimName: metis-data
- name: host-dev - name: host-dev
hostPath: hostPath:
path: /dev path: /dev

View File

@ -12,10 +12,10 @@ metadata:
spec: spec:
ingressClassName: traefik ingressClassName: traefik
tls: tls:
- hosts: ["recovery.bstein.dev"] - hosts: ["sentinel.bstein.dev"]
secretName: recovery-tls secretName: sentinel-tls
rules: rules:
- host: recovery.bstein.dev - host: sentinel.bstein.dev
http: http:
paths: paths:
- path: / - path: /

View File

@ -1,150 +0,0 @@
# services/maintenance/metis-inventory.yaml
classes:
- name: rpi5-ubuntu-worker
arch: arm64
os: ubuntu-24.04
image: ${METIS_IMAGE_RPI5_UBUNTU_WORKER}
checksum: ${METIS_IMAGE_RPI5_UBUNTU_WORKER_SHA256}
k3s_version: v1.33.3+k3s1
default_labels:
hardware: rpi5
node-role.kubernetes.io/worker: "true"
- name: rpi4-armbian-worker
arch: arm64
os: armbian-noble
image: ${METIS_IMAGE_RPI4_ARMBIAN_LONGHORN}
checksum: ${METIS_IMAGE_RPI4_ARMBIAN_LONGHORN_SHA256}
k3s_version: v1.31.5+k3s1
default_labels:
hardware: rpi4
node-role.kubernetes.io/worker: "true"
- name: rpi4-armbian-longhorn
arch: arm64
os: armbian-noble
image: ${METIS_IMAGE_RPI4_ARMBIAN_LONGHORN}
checksum: ${METIS_IMAGE_RPI4_ARMBIAN_LONGHORN_SHA256}
k3s_version: v1.31.5+k3s1
default_labels:
hardware: rpi4
node-role.kubernetes.io/worker: "true"
nodes:
- name: titan-10
class: rpi5-ubuntu-worker
hostname: titan-10
ip: 192.168.22.36
k3s_role: agent
k3s_url: https://192.168.22.7:6443
k3s_token: ${METIS_K3S_TOKEN}
ssh_user: ubuntu
ssh_authorized_keys:
- ${METIS_SSH_KEY_BRAD}
- ${METIS_SSH_KEY_ANANKE_TETHYS}
- ${METIS_SSH_KEY_ANANKE_DB}
- name: titan-12
class: rpi4-armbian-worker
hostname: titan-12
ip: 192.168.22.40
k3s_role: agent
k3s_url: https://192.168.22.7:6443
k3s_token: ${METIS_K3S_TOKEN}
ssh_user: atlas
ssh_authorized_keys:
- ${METIS_SSH_KEY_BRAD}
- ${METIS_SSH_KEY_ANANKE_TETHYS}
- ${METIS_SSH_KEY_ANANKE_DB}
- name: titan-16
class: rpi4-armbian-worker
hostname: titan-16
ip: 192.168.22.44
k3s_role: agent
k3s_url: https://192.168.22.7:6443
k3s_token: ${METIS_K3S_TOKEN}
ssh_user: atlas
ssh_authorized_keys:
- ${METIS_SSH_KEY_BRAD}
- ${METIS_SSH_KEY_ANANKE_TETHYS}
- ${METIS_SSH_KEY_ANANKE_DB}
usb_scratch:
mountpoint: /mnt/scratch
label: titan-16-scratch
fs: ext4
bind_targets:
- /var/lib/rancher
- /var/log
- name: titan-13
class: rpi4-armbian-longhorn
hostname: titan-13
ip: 192.168.22.41
k3s_role: agent
k3s_url: https://192.168.22.7:6443
k3s_token: ${METIS_K3S_TOKEN}
ssh_user: atlas
ssh_authorized_keys:
- ${METIS_SSH_KEY_BRAD}
- ${METIS_SSH_KEY_ANANKE_TETHYS}
- ${METIS_SSH_KEY_ANANKE_DB}
longhorn_disks:
- mountpoint: /mnt/astreae
uuid: 6031fa8b-f28c-45c3-b7bc-6133300e07c6
fs: ext4
- mountpoint: /mnt/asteria
uuid: cbd4989d-62b5-4741-8b2a-28fdae259cae
fs: ext4
- name: titan-15
class: rpi4-armbian-longhorn
hostname: titan-15
ip: 192.168.22.43
k3s_role: agent
k3s_url: https://192.168.22.7:6443
k3s_token: ${METIS_K3S_TOKEN}
ssh_user: atlas
ssh_authorized_keys:
- ${METIS_SSH_KEY_BRAD}
- ${METIS_SSH_KEY_ANANKE_TETHYS}
- ${METIS_SSH_KEY_ANANKE_DB}
longhorn_disks:
- mountpoint: /mnt/astreae
uuid: f3362f14-5822-449f-944b-ac570b5cd615
fs: ext4
- mountpoint: /mnt/asteria
uuid: 9c5316e6-f847-4884-b502-11f2d0d15d6f
fs: ext4
- name: titan-17
class: rpi4-armbian-longhorn
hostname: titan-17
ip: 192.168.22.45
k3s_role: agent
k3s_url: https://192.168.22.7:6443
k3s_token: ${METIS_K3S_TOKEN}
ssh_user: atlas
ssh_authorized_keys:
- ${METIS_SSH_KEY_BRAD}
- ${METIS_SSH_KEY_ANANKE_TETHYS}
- ${METIS_SSH_KEY_ANANKE_DB}
longhorn_disks:
- mountpoint: /mnt/astreae
uuid: 1fecdade-08b0-49cb-9ae3-be6c188b0a96
fs: ext4
- mountpoint: /mnt/asteria
uuid: 2fe9f613-d372-47ca-b84f-82084e4edda0
fs: ext4
- name: titan-19
class: rpi4-armbian-longhorn
hostname: titan-19
ip: 192.168.22.47
k3s_role: agent
k3s_url: https://192.168.22.7:6443
k3s_token: ${METIS_K3S_TOKEN}
ssh_user: atlas
ssh_authorized_keys:
- ${METIS_SSH_KEY_BRAD}
- ${METIS_SSH_KEY_ANANKE_TETHYS}
- ${METIS_SSH_KEY_ANANKE_DB}
longhorn_disks:
- mountpoint: /mnt/astreae
uuid: 4890abb9-dda2-4f4f-9c0f-081ee82849cf
fs: ext4
- mountpoint: /mnt/asteria
uuid: 2b4ea28d-b0e6-4fa3-841b-cd7067ae9153
fs: ext4

View File

@ -1,55 +0,0 @@
# services/maintenance/metis-k3s-token-sync-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: metis-k3s-token-sync
namespace: maintenance
spec:
schedule: "11 */6 * * *"
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 2
jobTemplate:
spec:
template:
spec:
serviceAccountName: metis-token-sync
restartPolicy: OnFailure
nodeName: titan-0a
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
containers:
- name: sync
image: hashicorp/vault:1.17.6
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
args:
- |
set -eu
token="$(tr -d '\n' < /host/var/lib/rancher/k3s/server/token)"
jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
VAULT_TOKEN="$(vault write -field=token auth/kubernetes/login role="${VAULT_K8S_ROLE}" jwt="${jwt}")"
export VAULT_TOKEN
vault kv put kv/atlas/maintenance/metis-runtime k3s_token="${token}"
env:
- name: VAULT_ADDR
value: http://vault.vault.svc.cluster.local:8200
- name: VAULT_K8S_ROLE
value: maintenance-metis-token-sync
securityContext:
runAsUser: 0
volumeMounts:
- name: k3s-server
mountPath: /host/var/lib/rancher/k3s/server
readOnly: true
volumes:
- name: k3s-server
hostPath:
path: /var/lib/rancher/k3s/server

View File

@ -12,7 +12,6 @@ rules:
- list - list
- watch - watch
- delete - delete
- patch
--- ---
apiVersion: rbac.authorization.k8s.io/v1 apiVersion: rbac.authorization.k8s.io/v1
kind: Role kind: Role

View File

@ -10,8 +10,6 @@ spec:
app: metis-sentinel-amd64 app: metis-sentinel-amd64
updateStrategy: updateStrategy:
type: RollingUpdate type: RollingUpdate
rollingUpdate:
maxUnavailable: 25%
template: template:
metadata: metadata:
labels: labels:
@ -31,7 +29,7 @@ spec:
kubernetes.io/arch: amd64 kubernetes.io/arch: amd64
containers: containers:
- name: metis-sentinel - name: metis-sentinel
image: registry.bstein.dev/bstein/metis-sentinel:0.1.0-103-amd64 # {"$imagepolicy": "maintenance:metis-sentinel-amd64"} image: registry.bstein.dev/bstein/metis-sentinel:0.1.0-0-amd64
imagePullPolicy: Always imagePullPolicy: Always
envFrom: envFrom:
- configMapRef: - configMapRef:

View File

@ -10,8 +10,6 @@ spec:
app: metis-sentinel-arm64 app: metis-sentinel-arm64
updateStrategy: updateStrategy:
type: RollingUpdate type: RollingUpdate
rollingUpdate:
maxUnavailable: 25%
template: template:
metadata: metadata:
labels: labels:
@ -31,7 +29,7 @@ spec:
kubernetes.io/arch: arm64 kubernetes.io/arch: arm64
containers: containers:
- name: metis-sentinel - name: metis-sentinel
image: registry.bstein.dev/bstein/metis-sentinel:0.1.0-103-arm64 # {"$imagepolicy": "maintenance:metis-sentinel-arm64"} image: registry.bstein.dev/bstein/metis-sentinel:0.1.0-0-arm64
imagePullPolicy: Always imagePullPolicy: Always
envFrom: envFrom:
- configMapRef: - configMapRef:

View File

@ -6,7 +6,7 @@ metadata:
namespace: maintenance namespace: maintenance
annotations: annotations:
prometheus.io/scrape: "true" prometheus.io/scrape: "true"
prometheus.io/port: "8080" prometheus.io/port: "80"
prometheus.io/path: "/metrics" prometheus.io/path: "/metrics"
spec: spec:
type: ClusterIP type: ClusterIP

View File

@ -74,7 +74,7 @@ spec:
args: args:
- --provider=oidc - --provider=oidc
- --config=/vault/secrets/oidc-config - --config=/vault/secrets/oidc-config
- --redirect-url=https://recovery.bstein.dev/oauth2/callback - --redirect-url=https://sentinel.bstein.dev/oauth2/callback
- --oidc-issuer-url=https://sso.bstein.dev/realms/atlas - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas
- --scope=openid profile email groups - --scope=openid profile email groups
- --email-domain=* - --email-domain=*
@ -96,7 +96,7 @@ spec:
- --approval-prompt=auto - --approval-prompt=auto
- --skip-jwt-bearer-tokens=true - --skip-jwt-bearer-tokens=true
- --oidc-groups-claim=groups - --oidc-groups-claim=groups
- --cookie-domain=recovery.bstein.dev - --cookie-domain=sentinel.bstein.dev
ports: ports:
- containerPort: 4180 - containerPort: 4180
name: http name: http

View File

@ -1,70 +0,0 @@
# services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml
# One-off emergency cleanup for titan-24 rootfs pressure.
# Safe to delete the finished Job/pod after it succeeds.
apiVersion: batch/v1
kind: Job
metadata:
name: titan-24-rootfs-sweep
namespace: maintenance
annotations:
kustomize.toolkit.fluxcd.io/force: "true"
spec:
backoffLimit: 6
ttlSecondsAfterFinished: 3600
template:
metadata:
labels:
app: titan-24-rootfs-sweep
spec:
restartPolicy: OnFailure
nodeSelector:
kubernetes.io/hostname: titan-24
tolerations:
- key: node.kubernetes.io/not-ready
operator: Exists
effect: NoSchedule
- key: node.kubernetes.io/unreachable
operator: Exists
effect: NoSchedule
- key: node.kubernetes.io/not-ready
operator: Exists
effect: NoExecute
tolerationSeconds: 300
- key: node.kubernetes.io/unreachable
operator: Exists
effect: NoExecute
tolerationSeconds: 300
containers:
- name: sweep
image: python:3.12.9-alpine3.20
command: ["/bin/sh", "/scripts/node_image_sweeper.sh"]
env:
- name: ONE_SHOT
value: "true"
- name: HIGH_USAGE_PERCENT
value: "0"
- name: EMERGENCY_USAGE_PERCENT
value: "0"
- name: LOG_RETENTION_DAYS
value: "1"
- name: ORPHAN_POD_RETENTION_DAYS
value: "0"
- name: JOURNAL_MAX_SIZE
value: "100M"
securityContext:
privileged: true
runAsUser: 0
volumeMounts:
- name: host-root
mountPath: /host
- name: script
mountPath: /scripts
readOnly: true
volumes:
- name: host-root
hostPath:
path: /
- name: script
configMap:
name: node-image-sweeper-script
defaultMode: 0555

Some files were not shown because too many files have changed in this diff Show More