Compare commits

..

42 Commits

Author SHA1 Message Date
jenkins
9a86c350dd quality(titan-iac): split metrics publisher and harden gate lint 2026-04-20 15:20:56 -03:00
jenkins
d342053196 ci(data-prepper): emit placeholder per-test metric series 2026-04-20 12:06:25 -03:00
jenkins
c3cca8ad9a monitoring(testing): add per-test history panels and metric emitter 2026-04-20 12:03:27 -03:00
jenkins
9103cd22f2 ci(data-prepper): add retention and archive quality artifacts 2026-04-20 10:49:54 -03:00
094d202803 monitoring: remove combined UPS draw series from history panels 2026-04-19 14:50:24 -03:00
411bc6b90d monitoring: elevate Atlas Testing dashboard and no-data fallbacks 2026-04-18 14:50:59 -03:00
26b8f23426 quality-gate: emit workspace coverage and LOC gauges 2026-04-17 05:47:38 -03:00
629df65c7b monitoring(soteria): tune PVC backup age thresholds for nightly cadence 2026-04-14 02:14:43 -03:00
e5a824e4e1 typhon: register app and add v2-safe ble/control runtime toggles 2026-04-13 22:02:57 -03:00
6815a67c1f maintenance(soteria): roll out 0.1.0-35 2026-04-13 16:51:46 -03:00
deefdb53ad maintenance(soteria): roll out 0.1.0-34 2026-04-13 14:23:24 -03:00
4e4c310cd4 maintenance(soteria): roll out 0.1.0-33 2026-04-13 13:58:44 -03:00
df79cad1c3 maintenance(soteria): grant pod logs and roll out 0.1.0-32 2026-04-13 12:51:38 -03:00
b3d8b13f39 maintenance(soteria): roll pvc-node pin fix and pod-read rbac 2026-04-13 03:32:25 -03:00
a23b6a4b93 maintenance(soteria): move restic vault path to shared scope 2026-04-13 03:01:29 -03:00
38abbd9fe1 maintenance(vault): roll sync pod after soteria secret mapping 2026-04-13 02:55:42 -03:00
ac12a9bfed maintenance(soteria): source restic credentials from vault 2026-04-13 02:54:05 -03:00
8a371e1267 monitoring(alerts): make soteria backup health rule driver-agnostic 2026-04-13 02:38:53 -03:00
f25186ef7e maintenance(soteria): switch to encrypted restic backups 2026-04-13 02:14:39 -03:00
a01dc0813a maintenance(soteria): enable b2 usage scan config and alert 2026-04-12 19:47:58 -03:00
609cfcb696 monitoring: force horizontal stat layout for power/climate panels 2026-04-12 19:04:35 -03:00
75a992b829 maintenance(soteria): tighten oauth2 ingress and drill validation 2026-04-12 14:58:25 -03:00
a87a5f7bff monitoring: fix typhon low-threshold alert semantics 2026-04-12 14:56:34 -03:00
a1c8a99866 monitoring(alerts): watch soteria authz denial spikes 2026-04-12 12:19:42 -03:00
7b3dfa335b maintenance(soteria): harden ingress path and add backup alerts 2026-04-12 12:12:43 -03:00
e1bba18b52 maintenance: set explicit jenkins cleanup schedule 2026-04-12 11:36:50 -03:00
52882f1bb5 maintenance(soteria): add serviceaccount and rbac manifests 2026-04-12 11:36:33 -03:00
5128741c53 maintenance: default jenkins cleanup to dry-run 2026-04-12 11:28:48 -03:00
96f923ae4c maintenance(soteria): add protected UI, OIDC bootstrap, and backup health panel wiring 2026-04-12 11:16:29 -03:00
95bc3953d1 maintenance: wire jenkins cleanup permissions 2026-04-12 11:00:50 -03:00
f4e921bb33 scheduling: keep app workloads off control-plane 2026-04-12 04:26:52 -03:00
616c6308b1 maintenance: remove pi-usb-scratch guard rollout 2026-04-12 01:02:41 -03:00
d9b30d6c5b maintenance(pi-usb-scratch): skip k3s runtime rsync during cutover 2026-04-11 12:11:15 -03:00
7c337ad5a1 maintenance(pi-usb-scratch): disable rollout jitter for initial cutover 2026-04-11 12:00:30 -03:00
3823b68ee2 maintenance(pi-usb-scratch): fix false mount conflict detection 2026-04-11 11:57:50 -03:00
40de2b59a5 maintenance: enforce Astraios + tmpfs /tmp on worker Pis 2026-04-11 11:54:43 -03:00
5483c04bb3 maintenance: add worker pi usb scratch rollout 2026-04-11 01:03:42 -03:00
64b4f14018 ariadne: remove remaining cronjobs and migrate schedule ownership 2026-04-10 22:40:58 -03:00
166020ca1d ariadne: migrate glue cronjobs to schedules 2026-04-10 21:22:35 -03:00
60446ee830 testing(ci): centralize quality gate contract 2026-04-10 17:06:53 -03:00
c38b6c5e27 ci: publish titan-iac tests and seed ananke/lesavka jobs 2026-04-10 16:38:55 -03:00
9419c4b26b dashboards: unify suite pass-rate metrics on platform counters 2026-04-10 15:35:20 -03:00
160 changed files with 7270 additions and 24068 deletions

306
Jenkinsfile vendored
View File

@ -12,19 +12,8 @@ spec:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: jnlp
image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
resources:
requests:
cpu: "25m"
memory: "256Mi"
- name: python
image: registry.bstein.dev/bstein/python:3.12-slim
command:
- cat
tty: true
- name: quality-tools
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
image: python:3.12-slim
command:
- cat
tty: true
@ -34,21 +23,8 @@ spec:
environment {
PIP_DISABLE_PIP_VERSION_CHECK = '1'
PYTHONUNBUFFERED = '1'
SUITE_NAME = 'titan_iac'
SUITE_NAME = 'titan-iac'
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
SONARQUBE_PROJECT_KEY = 'titan_iac'
SONARQUBE_TOKEN = credentials('sonarqube-token')
VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
QUALITY_GATE_SONARQUBE_ENFORCE = '1'
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
QUALITY_GATE_IRONBANK_ENFORCE = '1'
QUALITY_GATE_IRONBANK_REQUIRED = '0'
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
}
options {
disableConcurrentBuilds()
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
}
stages {
stage('Checkout') {
@ -58,175 +34,7 @@ spec:
}
stage('Install deps') {
steps {
sh '''
set -eu
if ! command -v git >/dev/null 2>&1; then
apt-get update
apt-get install -y --no-install-recommends git ca-certificates
rm -rf /var/lib/apt/lists/*
fi
pip install --no-cache-dir -r ci/requirements.txt
'''
}
}
stage('Prepare local quality evidence') {
steps {
sh '''
set -eu
mkdir -p build
set +e
python3 -m testing.quality_gate --profile local --build-dir build
local_quality_rc=$?
set -e
printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
'''
}
}
stage('Collect SonarQube evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
args=(
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
"-Dsonar.login=${SONARQUBE_TOKEN}"
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.sources=."
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml"
"-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
)
[ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
set +e
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
rc=${PIPESTATUS[0]}
set -e
printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
'''
}
sh '''
set -eu
mkdir -p build
python3 - <<'PY'
import base64
import json
import os
import time
import urllib.parse
import urllib.request
from pathlib import Path
host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
token = os.getenv('SONARQUBE_TOKEN', '').strip()
report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
payload = {
"status": "ERROR",
"note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
}
if host and project_key:
task_file = Path('.scannerwork/report-task.txt')
task_id = ''
if task_file.exists():
for line in task_file.read_text(encoding='utf-8').splitlines():
key, _, value = line.partition('=')
if key == 'ceTaskId':
task_id = value.strip()
break
if task_id:
ce_query = urllib.parse.urlencode({"id": task_id})
deadline = time.monotonic() + 180
while time.monotonic() < deadline:
ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
ce_request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(ce_request, timeout=12) as response:
ce_payload = json.loads(response.read().decode("utf-8"))
except Exception:
time.sleep(3)
continue
status = str(ce_payload.get("task", {}).get("status", "")).upper()
if status in {"SUCCESS", "FAILED", "CANCELED"}:
break
time.sleep(3)
query = urllib.parse.urlencode({"projectKey": project_key})
request = urllib.request.Request(
f"{host}/api/qualitygates/project_status?{query}",
method="GET",
)
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(request, timeout=12) as response:
payload = json.loads(response.read().decode("utf-8"))
except Exception as exc: # noqa: BLE001
payload = {"status": "ERROR", "error": str(exc)}
with open(report_path, "w", encoding="utf-8") as handle:
json.dump(payload, handle, indent=2, sort_keys=True)
handle.write("\\n")
PY
'''
}
}
stage('Collect IronBank evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
set +e
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
trivy_rc=$?
set -e
if [ ! -s build/trivy-fs.json ]; then
cat > build/ironbank-compliance.json <<EOF
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
EOF
exit 0
fi
'''
}
sh '''
set -eu
mkdir -p build
if [ -s build/trivy-fs.json ]; then
python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
exit 0
fi
python3 - <<'PY'
import json
import os
from pathlib import Path
report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
if report_path.exists():
raise SystemExit(0)
status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
payload = {
"status": status or "unknown",
"compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
}
payload = {k: v for k, v in payload.items() if v is not None}
if "status" not in payload:
payload["status"] = "unknown"
payload["note"] = (
"Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
"or write build/ironbank-compliance.json in image-building repos."
)
report_path.parent.mkdir(parents=True, exist_ok=True)
report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
PY
'''
sh 'pip install --no-cache-dir -r ci/requirements.txt'
}
}
stage('Run quality gate') {
@ -256,96 +64,8 @@ PY
stage('Enforce quality gate') {
steps {
sh '''
set -euo pipefail
gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)"
fail=0
if [ "${gate_rc}" -ne 0 ]; then
echo "quality gate failed with rc=${gate_rc}" >&2
fail=1
fi
enabled() {
case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
1|true|yes|on) return 0 ;;
*) return 1 ;;
esac
}
if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
sonar_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/sonarqube-quality-gate.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
print(status or "missing")
PY
)"
case "${sonar_status}" in
ok|pass|passed|success) ;;
*)
echo "sonarqube gate failed: ${sonar_status}" >&2
fail=1
;;
esac
fi
ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
ironbank_required=1
fi
if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
supply_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/ironbank-compliance.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
compliant = payload.get("compliant")
if compliant is True:
print("ok")
elif compliant is False:
print("failed")
else:
status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
print(status or "missing")
PY
)"
case "${supply_status}" in
ok|pass|passed|success|compliant) ;;
not_applicable|na|n/a)
if enabled "${ironbank_required}"; then
echo "supply chain gate required but status=${supply_status}" >&2
fail=1
fi
;;
*)
if enabled "${ironbank_required}"; then
echo "supply chain gate failed: ${supply_status}" >&2
fail=1
else
echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
fi
;;
esac
fi
exit "${fail}"
set -eu
test "$(cat build/quality-gate.rc 2>/dev/null || echo 1)" -eq 0
'''
}
}
@ -354,7 +74,7 @@ PY
script {
env.FLUX_BRANCH = sh(
returnStdout: true,
script: "grep -m1 '^\\s*branch:' clusters/atlas/flux-system/gotk-sync.yaml | sed 's/^\\s*branch:\\s*//'"
script: "awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml"
).trim()
if (!env.FLUX_BRANCH) {
error('Flux branch not found in gotk-sync.yaml')
@ -373,20 +93,6 @@ PY
steps {
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
sh '''
set -euo pipefail
if ! command -v git >/dev/null 2>&1; then
if command -v apk >/dev/null 2>&1; then
apk add --no-cache git >/dev/null
elif command -v apt-get >/dev/null 2>&1; then
apt-get update >/dev/null
apt-get install -y git >/dev/null
fi
fi
cd "${WORKSPACE:-$PWD}"
if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
echo "workspace is not a git checkout; skipping promote"
exit 0
fi
set +x
git config user.email "jenkins@bstein.dev"
git config user.name "jenkins"

View File

@ -11,19 +11,8 @@ spec:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: jnlp
image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
resources:
requests:
cpu: "25m"
memory: "256Mi"
- name: python
image: registry.bstein.dev/bstein/python:3.12-slim
command:
- cat
tty: true
- name: quality-tools
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
image: python:3.12-slim
command:
- cat
tty: true
@ -33,21 +22,8 @@ spec:
environment {
PIP_DISABLE_PIP_VERSION_CHECK = '1'
PYTHONUNBUFFERED = '1'
SUITE_NAME = 'titan_iac'
SUITE_NAME = 'titan-iac'
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
SONARQUBE_PROJECT_KEY = 'titan_iac'
SONARQUBE_TOKEN = credentials('sonarqube-token')
VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
QUALITY_GATE_SONARQUBE_ENFORCE = '1'
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
QUALITY_GATE_IRONBANK_ENFORCE = '1'
QUALITY_GATE_IRONBANK_REQUIRED = '0'
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
}
options {
disableConcurrentBuilds()
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
}
stages {
stage('Checkout') {
@ -57,175 +33,7 @@ spec:
}
stage('Install deps') {
steps {
sh '''
set -eu
if ! command -v git >/dev/null 2>&1; then
apt-get update
apt-get install -y --no-install-recommends git ca-certificates
rm -rf /var/lib/apt/lists/*
fi
pip install --no-cache-dir -r ci/requirements.txt
'''
}
}
stage('Prepare local quality evidence') {
steps {
sh '''
set -eu
mkdir -p build
set +e
python3 -m testing.quality_gate --profile local --build-dir build
local_quality_rc=$?
set -e
printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
'''
}
}
stage('Collect SonarQube evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
args=(
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
"-Dsonar.login=${SONARQUBE_TOKEN}"
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.sources=."
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml"
"-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
)
[ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
set +e
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
rc=${PIPESTATUS[0]}
set -e
printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
'''
}
sh '''
set -eu
mkdir -p build
python3 - <<'PY'
import base64
import json
import os
import time
import urllib.parse
import urllib.request
from pathlib import Path
host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
token = os.getenv('SONARQUBE_TOKEN', '').strip()
report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
payload = {
"status": "ERROR",
"note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
}
if host and project_key:
task_file = Path('.scannerwork/report-task.txt')
task_id = ''
if task_file.exists():
for line in task_file.read_text(encoding='utf-8').splitlines():
key, _, value = line.partition('=')
if key == 'ceTaskId':
task_id = value.strip()
break
if task_id:
ce_query = urllib.parse.urlencode({"id": task_id})
deadline = time.monotonic() + 180
while time.monotonic() < deadline:
ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
ce_request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(ce_request, timeout=12) as response:
ce_payload = json.loads(response.read().decode("utf-8"))
except Exception:
time.sleep(3)
continue
status = str(ce_payload.get("task", {}).get("status", "")).upper()
if status in {"SUCCESS", "FAILED", "CANCELED"}:
break
time.sleep(3)
query = urllib.parse.urlencode({"projectKey": project_key})
request = urllib.request.Request(
f"{host}/api/qualitygates/project_status?{query}",
method="GET",
)
if token:
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
request.add_header("Authorization", f"Basic {encoded}")
try:
with urllib.request.urlopen(request, timeout=12) as response:
payload = json.loads(response.read().decode("utf-8"))
except Exception as exc: # noqa: BLE001
payload = {"status": "ERROR", "error": str(exc)}
with open(report_path, "w", encoding="utf-8") as handle:
json.dump(payload, handle, indent=2, sort_keys=True)
handle.write("\\n")
PY
'''
}
}
stage('Collect IronBank evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
set +e
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
trivy_rc=$?
set -e
if [ ! -s build/trivy-fs.json ]; then
cat > build/ironbank-compliance.json <<EOF
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
EOF
exit 0
fi
'''
}
sh '''
set -eu
mkdir -p build
if [ -s build/trivy-fs.json ]; then
python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
exit 0
fi
python3 - <<'PY'
import json
import os
from pathlib import Path
report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
if report_path.exists():
raise SystemExit(0)
status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
payload = {
"status": status or "unknown",
"compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
}
payload = {k: v for k, v in payload.items() if v is not None}
if "status" not in payload:
payload["status"] = "unknown"
payload["note"] = (
"Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
"or write build/ironbank-compliance.json in image-building repos."
)
report_path.parent.mkdir(parents=True, exist_ok=True)
report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
PY
'''
sh 'pip install --no-cache-dir -r ci/requirements.txt'
}
}
stage('Run quality gate') {
@ -255,96 +63,8 @@ PY
stage('Enforce quality gate') {
steps {
sh '''
set -euo pipefail
gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)"
fail=0
if [ "${gate_rc}" -ne 0 ]; then
echo "quality gate failed with rc=${gate_rc}" >&2
fail=1
fi
enabled() {
case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
1|true|yes|on) return 0 ;;
*) return 1 ;;
esac
}
if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
sonar_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/sonarqube-quality-gate.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
print(status or "missing")
PY
)"
case "${sonar_status}" in
ok|pass|passed|success) ;;
*)
echo "sonarqube gate failed: ${sonar_status}" >&2
fail=1
;;
esac
fi
ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
ironbank_required=1
fi
if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
supply_status="$(python3 - <<'PY'
import json
from pathlib import Path
path = Path("build/ironbank-compliance.json")
if not path.exists():
print("missing")
raise SystemExit(0)
try:
payload = json.loads(path.read_text(encoding="utf-8"))
except Exception: # noqa: BLE001
print("error")
raise SystemExit(0)
compliant = payload.get("compliant")
if compliant is True:
print("ok")
elif compliant is False:
print("failed")
else:
status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
print(status or "missing")
PY
)"
case "${supply_status}" in
ok|pass|passed|success|compliant) ;;
not_applicable|na|n/a)
if enabled "${ironbank_required}"; then
echo "supply chain gate required but status=${supply_status}" >&2
fail=1
fi
;;
*)
if enabled "${ironbank_required}"; then
echo "supply chain gate failed: ${supply_status}" >&2
fail=1
else
echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
fi
;;
esac
fi
exit "${fail}"
set -eu
test "$(cat build/quality-gate.rc 2>/dev/null || echo 1)" -eq 0
'''
}
}
@ -372,20 +92,6 @@ PY
steps {
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
sh '''
set -euo pipefail
if ! command -v git >/dev/null 2>&1; then
if command -v apk >/dev/null 2>&1; then
apk add --no-cache git >/dev/null
elif command -v apt-get >/dev/null 2>&1; then
apt-get update >/dev/null
apt-get install -y git >/dev/null
fi
fi
cd "${WORKSPACE:-$PWD}"
if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
echo "workspace is not a git checkout; skipping promote"
exit 0
fi
set +x
git config user.email "jenkins@bstein.dev"
git config user.name "jenkins"

View File

@ -6,14 +6,10 @@ from __future__ import annotations
import json
import os
from glob import glob
from pathlib import Path
import sys
import urllib.error
import urllib.request
import xml.etree.ElementTree as ET
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
from ci.scripts import publish_test_metrics_quality as _quality_helpers
CANONICAL_CHECKS = _quality_helpers.CANONICAL_CHECKS
@ -187,7 +183,6 @@ def _build_payload(
failed_count: int,
branch: str,
build_number: str,
jenkins_job: str,
summary: dict | None = None,
workspace_line_coverage_percent: float = 0.0,
source_lines_over_500: int = 0,
@ -200,15 +195,8 @@ def _build_payload(
"suite": suite,
"branch": branch or "unknown",
"build_number": build_number or "unknown",
"jenkins_job": jenkins_job or suite,
}
)
test_case_base_labels = {
"suite": suite,
"branch": branch or "unknown",
"build_number": build_number or "unknown",
"jenkins_job": jenkins_job or suite,
}
lines = [
"# TYPE platform_quality_gate_runs_total counter",
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count}',
@ -221,8 +209,6 @@ def _build_payload(
"# TYPE titan_iac_quality_gate_run_status gauge",
f'titan_iac_quality_gate_run_status{{suite="{suite}",status="ok"}} {1 if status == "ok" else 0}',
f'titan_iac_quality_gate_run_status{{suite="{suite}",status="failed"}} {1 if status == "failed" else 0}',
"# TYPE platform_quality_gate_build_info gauge",
f"platform_quality_gate_build_info{build_labels} 1",
"# TYPE titan_iac_quality_gate_build_info gauge",
f"titan_iac_quality_gate_build_info{build_labels} 1",
"# TYPE platform_quality_gate_workspace_line_coverage_percent gauge",
@ -240,18 +226,12 @@ def _build_payload(
lines.append("# TYPE platform_quality_gate_test_case_result gauge")
if test_cases:
for test_name, test_status in test_cases:
labels = {
**test_case_base_labels,
"test": test_name,
"status": test_status,
}
lines.append(
f"platform_quality_gate_test_case_result{_label_str(labels)} 1"
f'platform_quality_gate_test_case_result{{suite="{suite}",test="{_escape_label(test_name)}",status="{_escape_label(test_status)}"}} 1'
)
else:
labels = {**test_case_base_labels, "test": "__no_test_cases__", "status": "skipped"}
lines.append(
f"platform_quality_gate_test_case_result{_label_str(labels)} 1"
f'platform_quality_gate_test_case_result{{suite="{suite}",test="__no_test_cases__",status="skipped"}} 1'
)
return "\n".join(lines) + "\n"
@ -264,11 +244,8 @@ def main() -> int:
junit_glob = os.getenv("JUNIT_GLOB", os.getenv("JUNIT_PATH", "build/junit-*.xml"))
exit_code_path = os.getenv("QUALITY_GATE_EXIT_CODE_PATH", os.getenv("GLUE_EXIT_CODE_PATH", "build/quality-gate.rc"))
summary_path = os.getenv("QUALITY_GATE_SUMMARY_PATH", "build/quality-gate-summary.json")
branch = os.getenv("BRANCH_NAME") or os.getenv("GIT_BRANCH") or "unknown"
if branch.startswith("origin/"):
branch = branch[len("origin/") :]
branch = os.getenv("BRANCH_NAME", os.getenv("GIT_BRANCH", ""))
build_number = os.getenv("BUILD_NUMBER", "")
jenkins_job = os.getenv("JOB_NAME", "titan-iac")
tests = _collect_junit_totals(junit_glob)
test_cases = _collect_junit_cases(junit_glob)
@ -322,7 +299,6 @@ def main() -> int:
failed_count=failed_count,
branch=branch,
build_number=build_number,
jenkins_job=jenkins_job,
summary=summary,
workspace_line_coverage_percent=workspace_line_coverage_percent,
source_lines_over_500=source_lines_over_500,

View File

@ -1,173 +0,0 @@
"""Build a titan-iac supply-chain compliance report from Trivy evidence."""
from __future__ import annotations
import argparse
import datetime as dt
import json
from pathlib import Path
from typing import Any
FAIL_SEVERITIES = {"HIGH", "CRITICAL"}
def _read_json(path: Path) -> dict[str, Any]:
"""Read a JSON object from disk for use as pipeline evidence."""
payload = json.loads(path.read_text(encoding="utf-8"))
if not isinstance(payload, dict):
raise ValueError(f"{path} must contain a JSON object")
return payload
def _parse_day(raw: str | None) -> dt.date | None:
"""Parse an ISO day while letting optional waiver dates stay optional."""
if not raw:
return None
return dt.date.fromisoformat(raw)
def _today(override: str | None = None) -> dt.date:
"""Return the policy day so tests can pin expiry behavior."""
return _parse_day(override) or dt.date.today()
def _load_waiver_pairs(path: Path | None, policy_day: dt.date) -> tuple[set[tuple[str, str]], int]:
"""Return active ``(misconfiguration id, target)`` waivers and expired count."""
if path is None or not path.exists():
return set(), 0
payload = _read_json(path)
default_expires_at = payload.get("default_expires_at")
active: set[tuple[str, str]] = set()
expired = 0
for entry in payload.get("misconfigurations", []):
if not isinstance(entry, dict):
continue
misconfiguration_id = str(entry.get("id") or "").strip()
if not misconfiguration_id:
continue
expires_at = _parse_day(str(entry.get("expires_at") or default_expires_at or ""))
targets = entry.get("targets", [])
if not isinstance(targets, list):
continue
if expires_at and expires_at < policy_day:
expired += len(targets)
continue
# Waivers are target-specific so a new unsafe manifest fails until it is
# either fixed or deliberately accepted with a fresh expiration.
for target in targets:
if isinstance(target, str) and target:
active.add((misconfiguration_id, target))
return active, expired
def _iter_failed_misconfigurations(payload: dict[str, Any]):
"""Yield failed high/critical Trivy misconfiguration records."""
for result in payload.get("Results", []):
if not isinstance(result, dict):
continue
target = str(result.get("Target") or "")
for item in result.get("Misconfigurations") or []:
if not isinstance(item, dict):
continue
if item.get("Status") != "FAIL":
continue
if str(item.get("Severity") or "").upper() not in FAIL_SEVERITIES:
continue
yield target, item
def _count_vulnerabilities(payload: dict[str, Any], severity: str) -> int:
"""Count Trivy vulnerabilities at a specific severity."""
count = 0
for result in payload.get("Results", []):
if not isinstance(result, dict):
continue
for item in result.get("Vulnerabilities") or []:
if isinstance(item, dict) and str(item.get("Severity") or "").upper() == severity:
count += 1
return count
def _count_secrets(payload: dict[str, Any]) -> int:
"""Count detected secrets in the Trivy filesystem report."""
count = 0
for result in payload.get("Results", []):
if isinstance(result, dict):
count += len(result.get("Secrets") or [])
return count
def build_report(
trivy_payload: dict[str, Any],
waiver_path: Path | None = None,
today_override: str | None = None,
) -> dict[str, Any]:
"""Build the compliance summary consumed by the quality gate."""
policy_day = _today(today_override)
active_waivers, expired_waivers = _load_waiver_pairs(waiver_path, policy_day)
open_misconfigs: list[dict[str, str]] = []
waived_misconfigs = 0
for target, item in _iter_failed_misconfigurations(trivy_payload):
misconfiguration_id = str(item.get("ID") or "")
if (misconfiguration_id, target) in active_waivers:
waived_misconfigs += 1
continue
open_misconfigs.append(
{
"id": misconfiguration_id,
"target": target,
"severity": str(item.get("Severity") or ""),
"title": str(item.get("Title") or ""),
}
)
critical = _count_vulnerabilities(trivy_payload, "CRITICAL")
high = _count_vulnerabilities(trivy_payload, "HIGH")
secrets = _count_secrets(trivy_payload)
status = "ok" if critical == 0 and secrets == 0 and not open_misconfigs else "failed"
return {
"status": status,
"compliant": status == "ok",
"category": "artifact_security",
"scan_type": "filesystem",
"scanner": "trivy",
"critical_vulnerabilities": critical,
"high_vulnerabilities": high,
"high_vulnerability_policy": "observe",
"secrets": secrets,
"high_or_critical_misconfigurations": len(open_misconfigs),
"waived_misconfigurations": waived_misconfigs,
"expired_waivers": expired_waivers,
"waiver_file": str(waiver_path) if waiver_path else "",
"open_misconfiguration_examples": open_misconfigs[:20],
}
def main(argv: list[str] | None = None) -> int:
"""CLI entrypoint used by Jenkins after the Trivy scan completes."""
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--trivy-json", required=True)
parser.add_argument("--waivers")
parser.add_argument("--output", required=True)
parser.add_argument("--today")
args = parser.parse_args(argv)
trivy_payload = _read_json(Path(args.trivy_json))
waiver_path = Path(args.waivers) if args.waivers else None
report = build_report(trivy_payload, waiver_path=waiver_path, today_override=args.today)
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8")
return 0
if __name__ == "__main__": # pragma: no cover
raise SystemExit(main())

View File

@ -1,18 +1,52 @@
max_success_age_hours: 48
allow_suspended:
- bstein-dev-home/vaultwarden-cred-sync
- comms/guest-name-randomizer
- comms/othrys-room-reset
- comms/pin-othrys-invite
- comms/seed-othrys-room
- finance/firefly-user-sync
- health/wger-admin-ensure
- health/wger-user-sync
- mailu-mailserver/mailu-sync-nightly
- nextcloud/nextcloud-mail-sync
- vault/vault-oidc-config
ariadne_schedule_tasks:
- schedule.mailu_sync
- schedule.nextcloud_sync
- schedule.vaultwarden_sync
- schedule.wger_admin
- task: schedule.mailu_sync
check_last_success: false
- task: schedule.nextcloud_sync
check_last_success: true
max_success_age_hours: 48
- task: schedule.nextcloud_cron
check_last_success: true
max_success_age_hours: 48
- task: schedule.nextcloud_maintenance
check_last_success: false
- task: schedule.vaultwarden_sync
check_last_success: true
max_success_age_hours: 48
- task: schedule.wger_user_sync
check_last_success: true
max_success_age_hours: 48
- task: schedule.wger_admin
check_last_success: false
- task: schedule.firefly_user_sync
check_last_success: true
max_success_age_hours: 48
- task: schedule.firefly_cron
check_last_success: false
- task: schedule.vault_k8s_auth
check_last_success: false
- task: schedule.vault_oidc
check_last_success: false
- task: schedule.comms_guest_name
check_last_success: true
max_success_age_hours: 48
- task: schedule.comms_pin_invite
check_last_success: false
- task: schedule.comms_reset_room
check_last_success: false
- task: schedule.comms_seed_room
check_last_success: true
max_success_age_hours: 48
- task: schedule.pod_cleaner
check_last_success: true
max_success_age_hours: 6
- task: schedule.opensearch_prune
check_last_success: false
- task: schedule.image_sweeper
check_last_success: true
max_success_age_hours: 18
- task: schedule.metis_k3s_token_sync
check_last_success: true
max_success_age_hours: 12
- task: schedule.platform_quality_suite_probe
check_last_success: true
max_success_age_hours: 2

View File

@ -1,5 +1,3 @@
"""Glue checks for Ariadne schedules exported to VictoriaMetrics."""
from __future__ import annotations
import os
@ -28,29 +26,11 @@ def _query(promql: str) -> list[dict]:
def _expected_tasks() -> list[dict]:
cfg = _load_config()
tasks = [
_normalize_task(item, cfg)
for item in cfg.get("ariadne_schedule_tasks", [])
]
tasks = cfg.get("ariadne_schedule_tasks", [])
assert tasks, "No Ariadne schedule tasks configured"
return tasks
def _normalize_task(item: object, cfg: dict) -> dict:
if isinstance(item, str):
return {
"task": item,
"check_last_success": True,
"max_success_age_hours": cfg.get("max_success_age_hours", 48),
}
if isinstance(item, dict):
normalized = dict(item)
normalized.setdefault("check_last_success", True)
normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
return normalized
raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
def _tracked_tasks(tasks: list[dict]) -> list[dict]:
tracked = [item for item in tasks if item.get("check_last_success")]
assert tracked, "No Ariadne schedule tasks are marked for success tracking"

View File

@ -1,46 +0,0 @@
from __future__ import annotations
from datetime import datetime, timezone
from pathlib import Path
import yaml
from kubernetes import client, config
CONFIG_PATH = Path(__file__).with_name("config.yaml")
def _load_config() -> dict:
with CONFIG_PATH.open("r", encoding="utf-8") as handle:
return yaml.safe_load(handle) or {}
def _load_kube():
try:
config.load_incluster_config()
except config.ConfigException:
config.load_kube_config()
def test_glue_cronjobs_recent_success():
cfg = _load_config()
max_age_hours = int(cfg.get("max_success_age_hours", 48))
allow_suspended = set(cfg.get("allow_suspended", []))
_load_kube()
batch = client.BatchV1Api()
cronjobs = batch.list_cron_job_for_all_namespaces(label_selector="atlas.bstein.dev/glue=true").items
assert cronjobs, "No glue cronjobs found with atlas.bstein.dev/glue=true"
now = datetime.now(timezone.utc)
for cronjob in cronjobs:
name = f"{cronjob.metadata.namespace}/{cronjob.metadata.name}"
if cronjob.spec.suspend:
assert name in allow_suspended, f"{name} is suspended but not in allow_suspended"
continue
last_success = cronjob.status.last_successful_time
assert last_success is not None, f"{name} has no lastSuccessfulTime"
age_hours = (now - last_success).total_seconds() / 3600
assert age_hours <= max_age_hours, f"{name} last success {age_hours:.1f}h ago"

View File

@ -1,5 +1,3 @@
"""Glue checks for the metrics the quality-gate publishes."""
from __future__ import annotations
import os
@ -27,29 +25,11 @@ def _query(promql: str) -> list[dict]:
def _expected_tasks() -> list[dict]:
cfg = _load_config()
tasks = [
_normalize_task(item, cfg)
for item in cfg.get("ariadne_schedule_tasks", [])
]
tasks = cfg.get("ariadne_schedule_tasks", [])
assert tasks, "No Ariadne schedule tasks configured"
return tasks
def _normalize_task(item: object, cfg: dict) -> dict:
if isinstance(item, str):
return {
"task": item,
"check_last_success": True,
"max_success_age_hours": cfg.get("max_success_age_hours", 48),
}
if isinstance(item, dict):
normalized = dict(item)
normalized.setdefault("check_last_success", True)
normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
return normalized
raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
def _tracked_tasks(tasks: list[dict]) -> list[dict]:
tracked = [item for item in tasks if item.get("check_last_success")]
assert tracked, "No Ariadne schedule tasks are marked for success tracking"

View File

@ -1,401 +0,0 @@
{
"version": 1,
"generated_from": "Jenkins titan-iac build 225 Trivy filesystem scan",
"default_expires_at": "2026-05-22",
"ticket": "atlas-quality-wave-k8s-hardening",
"default_reason": "Existing Kubernetes manifest hardening baseline accepted only for the first quality-gate rollout; fix or renew explicitly before expiry.",
"misconfigurations": [
{
"id": "DS-0002",
"targets": [
"dockerfiles/Dockerfile.ananke-node-helper"
]
},
{
"id": "KSV-0009",
"targets": [
"services/mailu/vip-controller.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml"
]
},
{
"id": "KSV-0010",
"targets": [
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml"
]
},
{
"id": "KSV-0014",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
"infrastructure/core/ntp-sync-daemonset.yaml",
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
"infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
"infrastructure/longhorn/core/vault-sync-deployment.yaml",
"infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
"infrastructure/postgres/statefulset.yaml",
"infrastructure/vault-csi/vault-csi-provider.yaml",
"services/ai-llm/deployment.yaml",
"services/bstein-dev-home/backend-deployment.yaml",
"services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
"services/bstein-dev-home/frontend-deployment.yaml",
"services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
"services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
"services/bstein-dev-home/vault-sync-deployment.yaml",
"services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
"services/comms/atlasbot-deployment.yaml",
"services/comms/coturn.yaml",
"services/comms/element-call-deployment.yaml",
"services/comms/guest-name-job.yaml",
"services/comms/guest-register-deployment.yaml",
"services/comms/livekit-token-deployment.yaml",
"services/comms/livekit.yaml",
"services/comms/mas-deployment.yaml",
"services/comms/oneoffs/bstein-force-leave-job.yaml",
"services/comms/oneoffs/comms-secrets-ensure-job.yaml",
"services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
"services/comms/oneoffs/mas-db-ensure-job.yaml",
"services/comms/oneoffs/mas-local-users-ensure-job.yaml",
"services/comms/oneoffs/othrys-kick-numeric-job.yaml",
"services/comms/oneoffs/synapse-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
"services/comms/oneoffs/synapse-user-seed-job.yaml",
"services/comms/pin-othrys-job.yaml",
"services/comms/reset-othrys-room-job.yaml",
"services/comms/seed-othrys-room.yaml",
"services/comms/vault-sync-deployment.yaml",
"services/comms/wellknown.yaml",
"services/crypto/monerod/deployment.yaml",
"services/crypto/wallet-monero-temp/deployment.yaml",
"services/crypto/xmr-miner/deployment.yaml",
"services/crypto/xmr-miner/vault-sync-deployment.yaml",
"services/crypto/xmr-miner/xmrig-daemonset.yaml",
"services/finance/actual-budget-deployment.yaml",
"services/finance/firefly-cronjob.yaml",
"services/finance/firefly-deployment.yaml",
"services/finance/firefly-user-sync-cronjob.yaml",
"services/finance/oneoffs/finance-secrets-ensure-job.yaml",
"services/gitea/deployment.yaml",
"services/harbor/vault-sync-deployment.yaml",
"services/health/wger-admin-ensure-cronjob.yaml",
"services/health/wger-deployment.yaml",
"services/health/wger-user-sync-cronjob.yaml",
"services/jellyfin/deployment.yaml",
"services/jellyfin/loader.yaml",
"services/jenkins/deployment.yaml",
"services/jenkins/vault-sync-deployment.yaml",
"services/keycloak/deployment.yaml",
"services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/ldap-federation-job.yaml",
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-e2e-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
"services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
"services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/realm-settings-job.yaml",
"services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/user-overrides-job.yaml",
"services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
"services/keycloak/vault-sync-deployment.yaml",
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/logging/oauth2-proxy.yaml",
"services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
"services/logging/oneoffs/opensearch-ism-job.yaml",
"services/logging/oneoffs/opensearch-observability-setup-job.yaml",
"services/logging/opensearch-prune-cronjob.yaml",
"services/logging/vault-sync-deployment.yaml",
"services/mailu/mailu-sync-cronjob.yaml",
"services/mailu/mailu-sync-listener.yaml",
"services/mailu/oneoffs/mailu-sync-job.yaml",
"services/mailu/vault-sync-deployment.yaml",
"services/mailu/vip-controller.yaml",
"services/maintenance/ariadne-deployment.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/metis-k3s-token-sync-cronjob.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oauth2-proxy-metis.yaml",
"services/maintenance/oauth2-proxy-soteria.yaml",
"services/maintenance/oneoffs/ariadne-migrate-job.yaml",
"services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
"services/maintenance/pod-cleaner-cronjob.yaml",
"services/maintenance/soteria-deployment.yaml",
"services/maintenance/vault-sync-deployment.yaml",
"services/monitoring/dcgm-exporter.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml",
"services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
"services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
"services/monitoring/platform-quality-gateway-deployment.yaml",
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
"services/monitoring/postmark-exporter-deployment.yaml",
"services/monitoring/vault-sync-deployment.yaml",
"services/nextcloud-mail-sync/cronjob.yaml",
"services/nextcloud/collabora.yaml",
"services/nextcloud/cronjob.yaml",
"services/nextcloud/deployment.yaml",
"services/nextcloud/maintenance-cronjob.yaml",
"services/oauth2-proxy/deployment.yaml",
"services/openldap/statefulset.yaml",
"services/outline/deployment.yaml",
"services/outline/redis-deployment.yaml",
"services/pegasus/deployment.yaml",
"services/pegasus/vault-sync-deployment.yaml",
"services/planka/deployment.yaml",
"services/quality/oauth2-proxy-sonarqube.yaml",
"services/quality/sonarqube-deployment.yaml",
"services/quality/sonarqube-exporter-deployment.yaml",
"services/sui-metrics/base/deployment.yaml",
"services/typhon/vault-sync-deployment.yaml",
"services/vault/k8s-auth-config-cronjob.yaml",
"services/vault/oidc-config-cronjob.yaml",
"services/vault/statefulset.yaml",
"services/vaultwarden/deployment.yaml"
]
},
{
"id": "KSV-0017",
"targets": [
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
"services/monitoring/dcgm-exporter.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml"
]
},
{
"id": "KSV-0041",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
"infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
"infrastructure/traefik/clusterrole.yaml",
"services/bstein-dev-home/rbac.yaml",
"services/comms/comms-secrets-ensure-rbac.yaml",
"services/comms/mas-db-ensure-rbac.yaml",
"services/comms/mas-secrets-ensure-rbac.yaml",
"services/maintenance/soteria-rbac.yaml"
]
},
{
"id": "KSV-0047",
"targets": [
"services/monitoring/rbac.yaml"
]
},
{
"id": "KSV-0053",
"targets": [
"services/comms/comms-secrets-ensure-rbac.yaml",
"services/comms/mas-db-ensure-rbac.yaml",
"services/jenkins/serviceaccount.yaml",
"services/maintenance/ariadne-rbac.yaml"
]
},
{
"id": "KSV-0056",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
"infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
"services/jenkins/serviceaccount.yaml",
"services/maintenance/disable-k3s-traefik-rbac.yaml",
"services/maintenance/k3s-traefik-cleanup-rbac.yaml"
]
},
{
"id": "KSV-0114",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml"
]
},
{
"id": "KSV-0118",
"targets": [
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
"infrastructure/core/coredns-deployment.yaml",
"infrastructure/core/ntp-sync-daemonset.yaml",
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
"infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
"infrastructure/longhorn/core/vault-sync-deployment.yaml",
"infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
"infrastructure/postgres/statefulset.yaml",
"infrastructure/vault-csi/vault-csi-provider.yaml",
"services/ai-llm/deployment.yaml",
"services/bstein-dev-home/backend-deployment.yaml",
"services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
"services/bstein-dev-home/frontend-deployment.yaml",
"services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
"services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
"services/bstein-dev-home/vault-sync-deployment.yaml",
"services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
"services/comms/atlasbot-deployment.yaml",
"services/comms/coturn.yaml",
"services/comms/element-call-deployment.yaml",
"services/comms/guest-name-job.yaml",
"services/comms/livekit-token-deployment.yaml",
"services/comms/livekit.yaml",
"services/comms/mas-deployment.yaml",
"services/comms/oneoffs/bstein-force-leave-job.yaml",
"services/comms/oneoffs/comms-secrets-ensure-job.yaml",
"services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
"services/comms/oneoffs/mas-db-ensure-job.yaml",
"services/comms/oneoffs/mas-local-users-ensure-job.yaml",
"services/comms/oneoffs/othrys-kick-numeric-job.yaml",
"services/comms/oneoffs/synapse-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
"services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
"services/comms/oneoffs/synapse-user-seed-job.yaml",
"services/comms/pin-othrys-job.yaml",
"services/comms/reset-othrys-room-job.yaml",
"services/comms/seed-othrys-room.yaml",
"services/comms/vault-sync-deployment.yaml",
"services/comms/wellknown.yaml",
"services/crypto/monerod/deployment.yaml",
"services/crypto/wallet-monero-temp/deployment.yaml",
"services/crypto/xmr-miner/deployment.yaml",
"services/crypto/xmr-miner/vault-sync-deployment.yaml",
"services/crypto/xmr-miner/xmrig-daemonset.yaml",
"services/finance/firefly-cronjob.yaml",
"services/finance/firefly-deployment.yaml",
"services/finance/firefly-user-sync-cronjob.yaml",
"services/finance/oneoffs/finance-secrets-ensure-job.yaml",
"services/gitea/deployment.yaml",
"services/harbor/vault-sync-deployment.yaml",
"services/health/wger-admin-ensure-cronjob.yaml",
"services/health/wger-deployment.yaml",
"services/health/wger-user-sync-cronjob.yaml",
"services/jellyfin/loader.yaml",
"services/jenkins/deployment.yaml",
"services/jenkins/vault-sync-deployment.yaml",
"services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/ldap-federation-job.yaml",
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
"services/keycloak/oneoffs/portal-e2e-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
"services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
"services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
"services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/realm-settings-job.yaml",
"services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
"services/keycloak/oneoffs/user-overrides-job.yaml",
"services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
"services/keycloak/vault-sync-deployment.yaml",
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/logging/oauth2-proxy.yaml",
"services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
"services/logging/oneoffs/opensearch-ism-job.yaml",
"services/logging/oneoffs/opensearch-observability-setup-job.yaml",
"services/logging/opensearch-prune-cronjob.yaml",
"services/logging/vault-sync-deployment.yaml",
"services/mailu/mailu-sync-cronjob.yaml",
"services/mailu/mailu-sync-listener.yaml",
"services/mailu/oneoffs/mailu-sync-job.yaml",
"services/mailu/vault-sync-deployment.yaml",
"services/mailu/vip-controller.yaml",
"services/maintenance/ariadne-deployment.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/k3s-agent-restart-daemonset.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/metis-k3s-token-sync-cronjob.yaml",
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oauth2-proxy-metis.yaml",
"services/maintenance/oauth2-proxy-soteria.yaml",
"services/maintenance/oneoffs/ariadne-migrate-job.yaml",
"services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
"services/maintenance/pod-cleaner-cronjob.yaml",
"services/maintenance/soteria-deployment.yaml",
"services/maintenance/vault-sync-deployment.yaml",
"services/monitoring/dcgm-exporter.yaml",
"services/monitoring/jetson-tegrastats-exporter.yaml",
"services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
"services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
"services/monitoring/platform-quality-gateway-deployment.yaml",
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
"services/monitoring/postmark-exporter-deployment.yaml",
"services/monitoring/vault-sync-deployment.yaml",
"services/nextcloud/collabora.yaml",
"services/oauth2-proxy/deployment.yaml",
"services/openldap/statefulset.yaml",
"services/outline/deployment.yaml",
"services/outline/redis-deployment.yaml",
"services/pegasus/vault-sync-deployment.yaml",
"services/quality/oauth2-proxy-sonarqube.yaml",
"services/quality/sonarqube-deployment.yaml",
"services/quality/sonarqube-exporter-deployment.yaml",
"services/sui-metrics/base/deployment.yaml",
"services/sui-metrics/overlays/atlas/patch-node-selector.yaml",
"services/typhon/deployment.yaml",
"services/typhon/vault-sync-deployment.yaml",
"services/vault/k8s-auth-config-cronjob.yaml",
"services/vault/oidc-config-cronjob.yaml",
"services/vaultwarden/deployment.yaml"
]
},
{
"id": "KSV-0121",
"targets": [
"services/logging/node-image-gc-rpi4-daemonset.yaml",
"services/logging/node-image-prune-rpi5-daemonset.yaml",
"services/logging/node-log-rotation-daemonset.yaml",
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
"services/maintenance/image-sweeper-cronjob.yaml",
"services/maintenance/metis-deployment.yaml",
"services/maintenance/node-image-sweeper-daemonset.yaml",
"services/maintenance/node-nofile-daemonset.yaml",
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml"
]
}
]
}

View File

@ -13,14 +13,14 @@ spec:
git:
checkout:
ref:
branch: main
branch: feature/ariadne
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(bstein-dev-home): automated image update"
push:
branch: main
branch: feature/ariadne
update:
strategy: Setters
path: services/bstein-dev-home

View File

@ -21,7 +21,6 @@ resources:
- sui-metrics/kustomization.yaml
- openldap/kustomization.yaml
- keycloak/kustomization.yaml
- quality/kustomization.yaml
- oauth2-proxy/kustomization.yaml
- mailu/kustomization.yaml
- jenkins/kustomization.yaml

View File

@ -1,35 +0,0 @@
# clusters/atlas/flux-system/applications/quality/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: quality
namespace: flux-system
spec:
interval: 10m
path: ./services/quality
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: quality
dependsOn:
- name: traefik
- name: cert-manager
- name: keycloak
- name: vault
- name: postgres
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: sonarqube
namespace: quality
- apiVersion: apps/v1
kind: Deployment
name: sonarqube-exporter
namespace: quality
- apiVersion: apps/v1
kind: Deployment
name: oauth2-proxy-sonarqube
namespace: quality
wait: false
timeout: 20m

View File

@ -13,14 +13,14 @@ spec:
git:
checkout:
ref:
branch: main
branch: feature/ariadne
commit:
author:
email: ops@bstein.dev
name: flux-bot
messageTemplate: "chore(maintenance): automated image update"
push:
branch: main
branch: feature/ariadne
update:
strategy: Setters
path: services/maintenance

View File

@ -2,8 +2,4 @@ FROM python:3.11-slim
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
RUN pip install --no-cache-dir requests psycopg2-binary \
&& groupadd --system guest-tools \
&& useradd --system --uid 65532 --gid guest-tools --home-dir /nonexistent --shell /usr/sbin/nologin guest-tools
USER guest-tools
RUN pip install --no-cache-dir requests psycopg2-binary

View File

@ -1,8 +1,16 @@
# Use the mirrored Harbor artifact so CI does not depend on Docker Hub egress.
FROM registry.bstein.dev/streaming/data-prepper@sha256:32ac6ad42e0f12da08bebee307e290b17d127b30def9b06eeaffbcbbc5033e83
FROM --platform=$BUILDPLATFORM opensearchproject/data-prepper:2.8.0 AS source
FROM --platform=$TARGETPLATFORM eclipse-temurin:17-jre
ENV DATA_PREPPER_PATH=/usr/share/data-prepper
RUN useradd -u 10001 -M -U -d / -s /usr/sbin/nologin data_prepper \
&& mkdir -p /var/log/data-prepper
COPY --from=source /usr/share/data-prepper /usr/share/data-prepper
RUN chown -R 10001:10001 /usr/share/data-prepper /var/log/data-prepper
USER 10001
WORKDIR /usr/share/data-prepper
CMD ["bin/data-prepper"]

View File

@ -1,13 +1,10 @@
FROM ghcr.io/element-hq/lk-jwt-service:0.3.0 AS base
FROM alpine:3.20
RUN apk add --no-cache ca-certificates \
&& addgroup -S livekit-token \
&& adduser -S -D -H -u 65532 -G livekit-token livekit-token
RUN apk add --no-cache ca-certificates
COPY --from=base /lk-jwt-service /lk-jwt-service
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER livekit-token
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/lk-jwt-service"]

View File

@ -29,12 +29,10 @@ FROM ${DEBIAN_IMAGE}
RUN set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends ca-certificates; \
update-ca-certificates; rm -rf /var/lib/apt/lists/*; \
groupadd --system p2pool; \
useradd --system --uid 65532 --gid p2pool --home-dir /nonexistent --shell /usr/sbin/nologin p2pool
update-ca-certificates; rm -rf /var/lib/apt/lists/*
COPY --from=fetch /out/p2pool /usr/local/bin/p2pool
RUN /usr/local/bin/p2pool --version || true
EXPOSE 3333
USER p2pool
ENTRYPOINT ["/usr/local/bin/p2pool"]

View File

@ -26,12 +26,9 @@ RUN set -eux; \
curl -fsSL "$URL" -o /opt/monero/monero.tar.bz2; \
tar -xjf /opt/monero/monero.tar.bz2 -C /opt/monero --strip-components=1; \
install -m 0755 /opt/monero/monero-wallet-rpc /usr/local/bin/monero-wallet-rpc; \
rm -f /opt/monero/monero.tar.bz2; \
groupadd --system monero; \
useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero
rm -f /opt/monero/monero.tar.bz2
ENV PATH="/usr/local/bin:/usr/bin:/bin"
RUN /usr/local/bin/monero-wallet-rpc --version || true
EXPOSE 18083
USER monero

View File

@ -23,14 +23,10 @@ RUN set -eux; \
mkdir -p /opt/monero; \
tar -xjf /tmp/monero.tar.bz2 -C /opt/monero --strip-components=1; \
rm -f /tmp/monero.tar.bz2; \
groupadd --system monero; \
useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero; \
mkdir -p /data; \
chown monero:monero /data; \
chmod 0770 /data
ENV LD_LIBRARY_PATH=/opt/monero:/opt/monero/lib \
PATH="/opt/monero:${PATH}"
USER monero
CMD ["/opt/monero/monerod", "--version"]

View File

@ -1,13 +1,10 @@
FROM quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 AS base
FROM alpine:3.20
RUN apk add --no-cache ca-certificates \
&& addgroup -S oauth2-proxy \
&& adduser -S -D -H -u 65532 -G oauth2-proxy oauth2-proxy
RUN apk add --no-cache ca-certificates
COPY --from=base /bin/oauth2-proxy /bin/oauth2-proxy
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER oauth2-proxy
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/bin/oauth2-proxy"]

View File

@ -1,13 +1,10 @@
FROM registry.bstein.dev/streaming/pegasus:1.2.32 AS base
FROM alpine:3.20
RUN apk add --no-cache ca-certificates \
&& addgroup -S pegasus \
&& adduser -S -D -H -u 65532 -G pegasus pegasus
RUN apk add --no-cache ca-certificates
COPY --from=base /pegasus /pegasus
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER pegasus
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/pegasus"]

View File

@ -1,48 +0,0 @@
# dockerfiles/Dockerfile.quality-tools
FROM debian:bookworm-slim
ARG SONAR_SCANNER_VERSION=8.0.1.6346
ARG TRIVY_VERSION=0.70.0
ENV TRIVY_CACHE_DIR=/opt/trivy-cache
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
bash \
ca-certificates \
curl \
git \
jq \
unzip \
&& rm -rf /var/lib/apt/lists/* \
&& groupadd --system quality-tools \
&& useradd --system --uid 65532 --gid quality-tools --home-dir /nonexistent --shell /usr/sbin/nologin quality-tools
RUN set -eux; \
scanner_zip="sonar-scanner-cli-${SONAR_SCANNER_VERSION}-linux-aarch64.zip"; \
base_url="https://binaries.sonarsource.com/Distribution/sonar-scanner-cli"; \
curl -fsSL "${base_url}/${scanner_zip}" -o "/tmp/${scanner_zip}"; \
curl -fsSL "${base_url}/${scanner_zip}.sha256" -o "/tmp/${scanner_zip}.sha256"; \
printf '%s %s\n' "$(cat "/tmp/${scanner_zip}.sha256")" "/tmp/${scanner_zip}" | sha256sum -c -; \
unzip -q "/tmp/${scanner_zip}" -d /opt; \
ln -s "/opt/sonar-scanner-${SONAR_SCANNER_VERSION}-linux-aarch64/bin/sonar-scanner" /usr/local/bin/sonar-scanner; \
rm -f "/tmp/${scanner_zip}" "/tmp/${scanner_zip}.sha256"
RUN set -eux; \
trivy_tgz="trivy_${TRIVY_VERSION}_Linux-ARM64.tar.gz"; \
curl -fsSL "https://github.com/aquasecurity/trivy/releases/download/v${TRIVY_VERSION}/${trivy_tgz}" -o "/tmp/${trivy_tgz}"; \
tar -C /usr/local/bin -xzf "/tmp/${trivy_tgz}" trivy; \
rm -f "/tmp/${trivy_tgz}"; \
trivy --version; \
sonar-scanner -v
RUN set -eux; \
mkdir -p "${TRIVY_CACHE_DIR}"; \
trivy image --download-db-only --cache-dir "${TRIVY_CACHE_DIR}"; \
chmod -R a+rX "${TRIVY_CACHE_DIR}"; \
mkdir -p /workspace; \
chown quality-tools:quality-tools /workspace
WORKDIR /workspace
USER quality-tools

View File

@ -33,36 +33,6 @@ spec:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
@ -76,36 +46,6 @@ spec:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
@ -119,36 +59,6 @@ spec:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:

View File

@ -26,9 +26,6 @@ spec:
cleanupOnFail: true
timeout: 15m
values:
global:
nodeSelector:
longhorn-host: "true"
service:
ui:
type: NodePort
@ -81,12 +78,3 @@ spec:
tag: v2.16.0
defaultSettings:
systemManagedPodsImagePullPolicy: Always
longhornManager:
nodeSelector:
longhorn-host: "true"
longhornDriver:
nodeSelector:
longhorn-host: "true"
longhornUI:
nodeSelector:
longhorn-host: "true"

View File

@ -2,11 +2,10 @@
apiVersion: batch/v1
kind: Job
metadata:
name: longhorn-settings-ensure-7
name: longhorn-settings-ensure-4
namespace: longhorn-system
spec:
backoffLimit: 0
activeDeadlineSeconds: 240
ttlSecondsAfterFinished: 3600
template:
spec:

View File

@ -4,12 +4,11 @@ set -eu
# Longhorn blocks direct CR patches for some settings; use the internal API instead.
api_base="http://longhorn-backend.longhorn-system.svc:9500/v1/settings"
curl_opts="-fsS --connect-timeout 3 --max-time 15"
wait_for_api() {
attempts=30
while [ "${attempts}" -gt 0 ]; do
if curl ${curl_opts} "${api_base}" >/dev/null 2>&1; then
if curl -fsS "${api_base}" >/dev/null 2>&1; then
return 0
fi
attempts=$((attempts - 1))
@ -23,14 +22,14 @@ update_setting() {
name="$1"
value="$2"
current="$(curl ${curl_opts} "${api_base}/${name}" || true)"
current="$(curl -fsS "${api_base}/${name}" || true)"
if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then
echo "Setting ${name} already set."
return 0
fi
echo "Setting ${name} -> ${value}"
curl ${curl_opts} -X PUT \
curl -fsS -X PUT \
-H "Content-Type: application/json" \
-d "{\"value\":\"${value}\"}" \
"${api_base}/${name}" >/dev/null
@ -41,7 +40,3 @@ update_setting default-engine-image "registry.bstein.dev/infra/longhorn-engine:v
update_setting default-instance-manager-image "registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2"
update_setting default-backing-image-manager-image "registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2"
update_setting support-bundle-manager-image "registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56"
# Keep storage-heavy nodes from getting hammered by rebuild storms and skew.
update_setting replica-auto-balance "best-effort"
update_setting concurrent-replica-rebuild-per-node-limit "2"
update_setting node-down-pod-deletion-policy "delete-both-statefulset-and-deployment-pod"

View File

@ -13,27 +13,9 @@ spec:
- objectName: "harbor-pull__dockerconfigjson"
secretPath: "kv/data/atlas/shared/harbor-pull"
secretKey: "dockerconfigjson"
- objectName: "longhorn-backup-b2__AWS_ACCESS_KEY_ID"
secretPath: "kv/data/atlas/longhorn/backup-b2"
secretKey: "AWS_ACCESS_KEY_ID"
- objectName: "longhorn-backup-b2__AWS_SECRET_ACCESS_KEY"
secretPath: "kv/data/atlas/longhorn/backup-b2"
secretKey: "AWS_SECRET_ACCESS_KEY"
- objectName: "longhorn-backup-b2__AWS_ENDPOINTS"
secretPath: "kv/data/atlas/longhorn/backup-b2"
secretKey: "AWS_ENDPOINTS"
secretObjects:
- secretName: longhorn-registry
type: kubernetes.io/dockerconfigjson
data:
- objectName: harbor-pull__dockerconfigjson
key: .dockerconfigjson
- secretName: longhorn-backup-b2
type: Opaque
data:
- objectName: longhorn-backup-b2__AWS_ACCESS_KEY_ID
key: AWS_ACCESS_KEY_ID
- objectName: longhorn-backup-b2__AWS_SECRET_ACCESS_KEY
key: AWS_SECRET_ACCESS_KEY
- objectName: longhorn-backup-b2__AWS_ENDPOINTS
key: AWS_ENDPOINTS

View File

@ -26,16 +26,6 @@ spec:
- key: hardware
operator: In
values: ["rpi5", "rpi4"]
- weight: 90
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
containers:
- name: sync
image: alpine:3.20

View File

@ -70,38 +70,6 @@ items:
dnsPolicy: ClusterFirst
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi4
restartPolicy: Always
schedulerName: default-scheduler
serviceAccount: atlas-traefik-ingress-controller

View File

@ -41,12 +41,3 @@ spec:
failurePolicy: Ignore
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values: ["titan-13", "titan-15", "titan-17", "titan-19"]

File diff suppressed because it is too large Load Diff

View File

@ -4,21 +4,13 @@ import pathlib
def load_module():
path = pathlib.Path(__file__).resolve().parents[1] / "dashboards_render_atlas.py"
spec = importlib.util.spec_from_file_location("scripts.dashboards_render_atlas", path)
spec = importlib.util.spec_from_file_location("dashboards_render_atlas", path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module
def flatten_panels(panels):
flat = []
for panel in panels:
flat.append(panel)
flat.extend(panel.get("panels", []))
return flat
def test_table_panel_options_and_filterable():
mod = load_module()
panel = mod.table_panel(
@ -64,71 +56,3 @@ def test_render_configmap_writes(tmp_path):
content = (tmp_path / "cm.yaml").read_text()
assert "kind: ConfigMap" in content
assert f"{uid}.json" in content
def test_testing_suite_variable_uses_canonical_values_only():
mod = load_module()
variable = mod.testing_suite_variable()
canonical_matcher = "|".join(mod.PLATFORM_TEST_SUITE_NAMES)
legacy_names = {"bstein-home", "data-prepper", "titan-iac", "pegasus-health"}
assert variable["allValue"] == canonical_matcher
assert not any(alias in variable["query"] for alias in legacy_names)
assert not any(alias in variable["allValue"] for alias in legacy_names)
assert [option["value"] for option in variable["options"]] == mod.PLATFORM_TEST_SUITE_NAMES
def test_jobs_dashboard_separates_current_gate_health_from_reliability():
mod = load_module()
dashboard = mod.build_jobs_dashboard()
panels_by_title = {panel["title"]: panel for panel in flatten_panels(dashboard["panels"])}
assert "Current Gate Health by Suite" in panels_by_title
assert "Run Reliability by Suite (24h)" in panels_by_title
assert "Run Reliability History by Suite" in panels_by_title
assert "Failures by Suite (24h)" not in panels_by_title
assert "Success Rate by Suite (24h)" not in panels_by_title
current_gate_expr = panels_by_title["Current Gate Health by Suite"]["targets"][0]["expr"]
assert 'check)' in current_gate_expr
assert 'result=~"ok|passed|success|not_applicable|skipped|na|n/a"' in current_gate_expr
reliability_panel = panels_by_title["Run Reliability by Suite (24h)"]
reliability_expr = reliability_panel["targets"][0]["expr"]
assert "platform_quality_gate_runs_total" in reliability_expr
assert "> 0" in reliability_expr
assert "- 1" in reliability_expr
assert reliability_panel["fieldConfig"]["defaults"]["mappings"] == [
{"type": "value", "options": {"-1": {"text": "no runs"}}}
]
def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint():
mod = load_module()
dashboard = mod.build_jobs_dashboard()
panels = dashboard["panels"]
rows = [panel for panel in panels if panel["type"] == "row"]
visible_query_panels = [panel for panel in panels if panel["type"] != "row"]
nested_panels_by_title = {
child["title"]: child
for row in rows
for child in row.get("panels", [])
}
assert len(panels) == 16
assert len(visible_query_panels) == 11
assert sum(len(panel.get("targets", [])) for panel in visible_query_panels) == 11
assert [row["title"] for row in rows] == [
"Reliability And Run History",
"Failure Trends By Check",
"Success Trends By Check",
"Test Drilldowns And Problem Tests",
"Telemetry Completeness, SonarQube, And Branches",
]
assert all(row["collapsed"] for row in rows)
assert "Failure Trend: Coverage" in nested_panels_by_title
assert "Success Trend: Supply Chain" in nested_panels_by_title
assert "Selected Test Pass Rate History" in nested_panels_by_title
assert "Missing Coverage Metrics by Suite" in nested_panels_by_title
assert "SonarQube API Up" in nested_panels_by_title

View File

@ -138,100 +138,6 @@ def test_kc_get_users_paginates(monkeypatch):
assert sync.SESSION.calls == 1
def test_kc_get_users_fetches_second_page_after_full_batch(monkeypatch):
sync = load_sync_module(monkeypatch)
class _PagedSession:
def __init__(self):
self.calls = 0
self.first_params = []
def get(self, *_, **kwargs):
self.calls += 1
self.first_params.append(kwargs["params"]["first"])
if self.calls == 1:
return _FakeResponse([{"id": f"u{i}"} for i in range(200)])
return _FakeResponse([{"id": "last"}])
sync.SESSION = _PagedSession()
users = sync.kc_get_users("tok")
assert len(users) == 201
assert sync.SESSION.first_params == [0, 200]
def test_get_kc_token_posts_client_credentials(monkeypatch):
sync = load_sync_module(monkeypatch)
calls = []
class _TokenSession:
def post(self, url, data, timeout):
calls.append((url, data, timeout))
return _FakeResponse({"access_token": "tok"})
sync.SESSION = _TokenSession()
assert sync.get_kc_token() == "tok"
assert calls[0][1]["grant_type"] == "client_credentials"
def test_retry_request_retries_then_succeeds(monkeypatch):
sync = load_sync_module(monkeypatch)
attempts = []
sleeps = []
def _flaky():
attempts.append(1)
if len(attempts) == 1:
raise sync.requests.RequestException("temporary")
return "ok"
monkeypatch.setattr(sync.time, "sleep", lambda seconds: sleeps.append(seconds))
assert sync.retry_request("request", _flaky, attempts=2) == "ok"
assert sleeps == [2]
def test_retry_request_reraises_final_error(monkeypatch):
sync = load_sync_module(monkeypatch)
monkeypatch.setattr(sync.time, "sleep", lambda seconds: None)
with pytest.raises(sync.requests.RequestException):
sync.retry_request(
"request",
lambda: (_ for _ in ()).throw(sync.requests.RequestException("nope")),
attempts=1,
)
def test_retry_db_connect_retries_then_succeeds(monkeypatch):
sync = load_sync_module(monkeypatch)
attempts = []
sleeps = []
def _connect(**kwargs):
attempts.append(kwargs)
if len(attempts) == 1:
raise sync.psycopg2.Error("not yet")
return "conn"
monkeypatch.setattr(sync.psycopg2, "connect", _connect)
monkeypatch.setattr(sync.time, "sleep", lambda seconds: sleeps.append(seconds))
assert sync.retry_db_connect(attempts=2) == "conn"
assert sleeps == [2]
def test_retry_db_connect_reraises_final_error(monkeypatch):
sync = load_sync_module(monkeypatch)
monkeypatch.setattr(sync.psycopg2, "connect", lambda **kwargs: (_ for _ in ()).throw(sync.psycopg2.Error("down")))
monkeypatch.setattr(sync.time, "sleep", lambda seconds: None)
with pytest.raises(sync.psycopg2.Error):
sync.retry_db_connect(attempts=1)
def test_ensure_mailu_user_skips_foreign_domain(monkeypatch):
sync = load_sync_module(monkeypatch)
executed = []
@ -260,87 +166,6 @@ def test_ensure_mailu_user_upserts(monkeypatch):
assert captured["password"] != "pw"
def test_attribute_and_email_helpers(monkeypatch):
sync = load_sync_module(monkeypatch)
assert sync.get_attribute_value({"x": ["first", "second"]}, "x") == "first"
assert sync.get_attribute_value({"x": []}, "x") is None
assert sync.get_attribute_value({"x": "value"}, "x") == "value"
assert sync.mailu_enabled({"mailu_email": ["legacy@example.com"]}) is True
assert sync.mailu_enabled({"mailu_enabled": ["off"]}) is False
assert sync.resolve_mailu_email({"username": "fallback", "email": "user@example.com"}, {}) == "user@example.com"
assert sync.resolve_mailu_email({"username": "fallback", "email": "user@other.com"}, {}) == "fallback@example.com"
def test_safe_update_payload_filters_fields(monkeypatch):
sync = load_sync_module(monkeypatch)
payload = sync._safe_update_payload(
{
"username": "user",
"enabled": True,
"email": "user@example.com",
"emailVerified": False,
"firstName": "User",
"lastName": "Example",
"requiredActions": ["UPDATE_PASSWORD", 7],
"attributes": "not-a-dict",
"ignored": "value",
}
)
assert payload == {
"username": "user",
"enabled": True,
"email": "user@example.com",
"emailVerified": False,
"firstName": "User",
"lastName": "Example",
"requiredActions": ["UPDATE_PASSWORD"],
"attributes": {},
}
def test_ensure_system_mailboxes_handles_configurations(monkeypatch, capsys):
sync = load_sync_module(monkeypatch)
ensured = []
monkeypatch.setattr(sync, "MAILU_SYSTEM_USERS", ["postmaster@example.com", "abuse"])
monkeypatch.setattr(sync, "MAILU_SYSTEM_PASSWORD", "")
sync.ensure_system_mailboxes(object())
assert "MAILU_SYSTEM_PASSWORD is missing" in capsys.readouterr().out
def _ensure(cursor, email, password, display_name):
ensured.append((email, password, display_name))
if email == "abuse":
raise RuntimeError("boom")
monkeypatch.setattr(sync, "MAILU_SYSTEM_PASSWORD", "pw")
monkeypatch.setattr(sync, "ensure_mailu_user", _ensure)
sync.ensure_system_mailboxes(object())
out = capsys.readouterr().out
assert ensured == [
("postmaster@example.com", "pw", "postmaster"),
("abuse", "pw", "abuse"),
]
assert "Ensured system mailbox for postmaster@example.com" in out
assert "Failed to ensure system mailbox abuse" in out
def test_main_exits_without_users_or_system_mailboxes(monkeypatch, capsys):
sync = load_sync_module(monkeypatch)
monkeypatch.setattr(sync, "MAILU_SYSTEM_USERS", [])
monkeypatch.setattr(sync, "get_kc_token", lambda: "tok")
monkeypatch.setattr(sync, "kc_get_users", lambda token: [])
sync.main()
assert "No users found; exiting." in capsys.readouterr().out
def test_main_generates_password_and_upserts(monkeypatch):
sync = load_sync_module(monkeypatch)
monkeypatch.setattr(sync.bcrypt_sha256, "hash", lambda password: f"hash:{password}")

View File

@ -1,134 +0,0 @@
import importlib.util
import io
import pathlib
import types
def load_listener_module(monkeypatch):
monkeypatch.setenv("MAILU_SYNC_WAIT_TIMEOUT_SEC", "0")
module_path = (
pathlib.Path(__file__).resolve().parents[2]
/ "services"
/ "mailu"
/ "scripts"
/ "mailu_sync_listener.py"
)
spec = importlib.util.spec_from_file_location("mailu_sync_listener_testmod", module_path)
module = importlib.util.module_from_spec(spec)
assert spec.loader is not None
spec.loader.exec_module(module)
return module
def _handler_for(listener, body):
handler = listener.Handler.__new__(listener.Handler)
raw = body if isinstance(body, bytes) else body.encode()
handler.headers = {"Content-Length": str(len(raw))}
handler.rfile = io.BytesIO(raw)
handler.responses = []
handler.headers_ended = 0
handler.send_response = lambda code: handler.responses.append(code)
handler.end_headers = lambda: setattr(handler, "headers_ended", handler.headers_ended + 1)
return handler
def test_listener_run_sync_blocking_updates_state(monkeypatch):
listener = load_listener_module(monkeypatch)
monkeypatch.setattr(listener, "time", lambda: 42.0)
monkeypatch.setattr(
listener.subprocess,
"run",
lambda command, check: types.SimpleNamespace(returncode=3),
)
assert listener._run_sync_blocking() == 3
assert listener.last_rc == 3
assert listener.last_run == 42.0
assert listener.sync_done.is_set()
listener.sync_running = True
assert listener._run_sync_blocking() == 0
def test_listener_trigger_sync_async_honors_running_and_debounce(monkeypatch):
listener = load_listener_module(monkeypatch)
starts = []
class _Thread:
def __init__(self, target, daemon):
self.target = target
self.daemon = daemon
def start(self):
starts.append((self.target, self.daemon))
monkeypatch.setattr(listener.threading, "Thread", _Thread)
monkeypatch.setattr(listener, "time", lambda: 100.0)
listener.sync_running = True
assert listener._trigger_sync_async() is False
listener.sync_running = False
listener.last_run = 95.0
assert listener._trigger_sync_async() is False
assert listener._trigger_sync_async(force=True) is True
assert starts and starts[0][1] is True
def test_listener_post_rejects_invalid_json(monkeypatch):
listener = load_listener_module(monkeypatch)
handler = _handler_for(listener, b"{not-json")
handler.do_POST()
assert handler.responses == [400]
assert handler.headers_ended == 1
def test_listener_post_triggers_async_without_wait(monkeypatch):
listener = load_listener_module(monkeypatch)
called = []
monkeypatch.setattr(listener, "_trigger_sync_async", lambda force=False: called.append(force) or True)
handler = _handler_for(listener, '{"force": true}')
handler.do_POST()
assert called == [True]
assert handler.responses == [202]
def test_listener_post_wait_returns_success_or_failure(monkeypatch):
listener = load_listener_module(monkeypatch)
called = []
monkeypatch.setattr(listener, "_trigger_sync_async", lambda force=False: called.append(force) or True)
listener.sync_running = False
listener.last_rc = 0
handler = _handler_for(listener, '{"wait": true, "force": true}')
handler.do_POST()
assert called == [True]
assert handler.responses == [200]
listener.last_rc = 2
handler = _handler_for(listener, '{"wait": true}')
handler.do_POST()
assert handler.responses == [500]
def test_listener_post_wait_keeps_running_request_successful(monkeypatch):
listener = load_listener_module(monkeypatch)
listener.sync_running = True
handler = _handler_for(listener, '{"wait": true}')
handler.do_POST()
assert handler.responses == [200]
def test_listener_log_message_is_quiet(monkeypatch):
listener = load_listener_module(monkeypatch)
handler = listener.Handler.__new__(listener.Handler)
assert handler.log_message("ignored %s", "value") is None

View File

@ -1,73 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
MODE="${1:-dry-run}"
if [[ "$MODE" != "dry-run" && "$MODE" != "active" ]]; then
echo "usage: $0 [dry-run|active]" >&2
exit 2
fi
EXPECTED_DRY_RUN="true"
PROM_MODE="dry_run"
if [[ "$MODE" == "active" ]]; then
EXPECTED_DRY_RUN="false"
PROM_MODE="delete"
fi
KUSTOMIZATION="${KUSTOMIZATION:-maintenance}"
NAMESPACE="${NAMESPACE:-maintenance}"
DEPLOYMENT="${DEPLOYMENT:-ariadne}"
LOCAL_METRICS_PORT="${LOCAL_METRICS_PORT:-18080}"
for cmd in flux kubectl curl grep awk; do
if ! command -v "$cmd" >/dev/null 2>&1; then
echo "missing required command: $cmd" >&2
exit 2
fi
done
echo "[1/5] reconcile Flux kustomization: ${KUSTOMIZATION}"
flux reconcile kustomization "$KUSTOMIZATION" --namespace flux-system --with-source
echo "[2/5] wait for deployment rollout"
kubectl -n "$NAMESPACE" rollout status "deployment/$DEPLOYMENT" --timeout=5m
echo "[3/5] verify ariadne env wiring"
ENV_DUMP="$(kubectl -n "$NAMESPACE" get deployment "$DEPLOYMENT" -o jsonpath='{range .spec.template.spec.containers[0].env[*]}{.name}={.value}{"\n"}{end}')"
echo "$ENV_DUMP" | grep -F "ARIADNE_SCHEDULE_JENKINS_WORKSPACE_CLEANUP=45 */6 * * *"
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_NAMESPACE=jenkins"
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_PVC_PREFIX=pvc-workspace-"
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS=24"
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_DRY_RUN=${EXPECTED_DRY_RUN}"
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_MAX_DELETIONS_PER_RUN=20"
echo "[4/5] scrape /metrics and confirm cleanup metrics are exported"
PF_LOG="$(mktemp)"
METRICS_FILE="$(mktemp)"
cleanup() {
if [[ -n "${PF_PID:-}" ]]; then
kill "$PF_PID" >/dev/null 2>&1 || true
wait "$PF_PID" 2>/dev/null || true
fi
rm -f "$PF_LOG" "$METRICS_FILE"
}
trap cleanup EXIT
kubectl -n "$NAMESPACE" port-forward "deployment/$DEPLOYMENT" "${LOCAL_METRICS_PORT}:8080" >"$PF_LOG" 2>&1 &
PF_PID=$!
sleep 2
curl -fsS "http://127.0.0.1:${LOCAL_METRICS_PORT}/metrics" >"$METRICS_FILE"
grep -F "# HELP ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE"
grep -F "# HELP ariadne_jenkins_workspace_cleanup_objects_total" "$METRICS_FILE"
echo "[5/5] show recent cleanup signal"
if grep -q "ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE"; then
grep "ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE" | grep "mode=\"${PROM_MODE}\"" || true
else
echo "No run counter sample yet for mode=${PROM_MODE}; wait for schedule window and re-run." >&2
fi
echo "Recent cleanup logs (if any):"
kubectl -n "$NAMESPACE" logs "deployment/$DEPLOYMENT" --tail=500 | grep -i "jenkins workspace cleanup" | tail -n 20 || true
echo "verification complete for mode=${MODE}"

View File

@ -5,7 +5,7 @@ metadata:
name: ollama
namespace: ai
spec:
replicas: 0
replicas: 1
revisionHistoryLimit: 2
strategy:
type: RollingUpdate
@ -21,7 +21,7 @@ spec:
app: ollama
annotations:
ai.bstein.dev/model: qwen2.5:14b-instruct-q4_0
ai.bstein.dev/gpu: GPU pool (titan-20/21)
ai.bstein.dev/gpu: GPU pool (titan-22/24)
ai.bstein.dev/restartedAt: "2026-01-26T12:00:00Z"
spec:
affinity:
@ -32,13 +32,13 @@ spec:
- key: kubernetes.io/hostname
operator: In
values:
- titan-20
- titan-21
- titan-22
- titan-24
runtimeClassName: nvidia
volumes:
- name: models
persistentVolumeClaim:
claimName: ollama-models-asteria
claimName: ollama-models
initContainers:
- name: warm-model
image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d

View File

@ -2,12 +2,12 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ollama-models-asteria
name: ollama-models
namespace: ai
spec:
accessModes:
- ReadWriteMany
- ReadWriteOnce
resources:
requests:
storage: 30Gi
storageClassName: asteria
storageClassName: astreae

View File

@ -49,15 +49,6 @@ spec:
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values: ["titan-13", "titan-15", "titan-17", "titan-19"]
imagePullSecrets:
- name: harbor-regcred
containers:

View File

@ -38,36 +38,6 @@ spec:
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
containers:
- name: gateway
image: python:3.11-slim

View File

@ -26,7 +26,7 @@ spec:
imagePullPolicy: Always
ports:
- name: http
containerPort: 8080
containerPort: 80
readinessProbe:
httpGet:
path: /

View File

@ -10,4 +10,4 @@ spec:
ports:
- name: http
port: 80
targetPort: 8080
targetPort: 80

View File

@ -15,14 +15,13 @@ resources:
- frontend-service.yaml
- backend-deployment.yaml
- backend-service.yaml
- vaultwarden-cred-sync-cronjob.yaml
- oneoffs/portal-onboarding-e2e-test-job.yaml
- ingress.yaml
images:
- name: registry.bstein.dev/bstein/bstein-dev-home-frontend
newTag: 0.1.1-267 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend:tag"}
newTag: 0.1.1-120 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend:tag"}
- name: registry.bstein.dev/bstein/bstein-dev-home-backend
newTag: 0.1.1-267 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend:tag"}
newTag: 0.1.1-123 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend:tag"}
configMapGenerator:
- name: chat-ai-gateway
namespace: bstein-dev-home
@ -30,12 +29,6 @@ configMapGenerator:
- gateway.py=scripts/gateway.py
options:
disableNameSuffixHash: true
- name: vaultwarden-cred-sync-script
namespace: bstein-dev-home
files:
- vaultwarden_cred_sync.py=scripts/vaultwarden_cred_sync.py
options:
disableNameSuffixHash: true
- name: portal-onboarding-e2e-tests
namespace: bstein-dev-home
files:

View File

@ -1,245 +0,0 @@
#!/usr/bin/env python3
from __future__ import annotations
import os
import sys
import time
from datetime import datetime, timezone
from typing import Any, Iterable
import httpx
from atlas_portal import settings
from atlas_portal.keycloak import admin_client
from atlas_portal.vaultwarden import invite_user
VAULTWARDEN_EMAIL_ATTR = "vaultwarden_email"
VAULTWARDEN_STATUS_ATTR = "vaultwarden_status"
VAULTWARDEN_SYNCED_AT_ATTR = "vaultwarden_synced_at"
VAULTWARDEN_RETRY_COOLDOWN_SEC = int(os.getenv("VAULTWARDEN_RETRY_COOLDOWN_SEC", "1800"))
VAULTWARDEN_FAILURE_BAILOUT = int(os.getenv("VAULTWARDEN_FAILURE_BAILOUT", "2"))
def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]:
client = admin_client()
if not client.ready():
raise RuntimeError("keycloak admin client not configured")
url = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users"
first = 0
while True:
headers = _headers_with_retry(client)
# We need attributes for idempotency (vaultwarden_status/vaultwarden_email). Keycloak defaults to a
# brief representation which may omit these.
params = {"first": str(first), "max": str(page_size), "briefRepresentation": "false"}
payload = None
for attempt in range(1, 6):
try:
with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http:
resp = http.get(url, params=params, headers=headers)
resp.raise_for_status()
payload = resp.json()
break
except httpx.HTTPError as exc:
if attempt == 5:
raise
time.sleep(attempt * 2)
if not isinstance(payload, list) or not payload:
return
for item in payload:
if isinstance(item, dict):
yield item
if len(payload) < page_size:
return
first += page_size
def _headers_with_retry(client, attempts: int = 6) -> dict[str, str]:
last_exc: Exception | None = None
for attempt in range(1, attempts + 1):
try:
return client.headers()
except Exception as exc:
last_exc = exc
time.sleep(attempt * 2)
if last_exc:
raise last_exc
raise RuntimeError("failed to fetch keycloak headers")
def _extract_attr(attrs: Any, key: str) -> str:
if not isinstance(attrs, dict):
return ""
raw = attrs.get(key)
if isinstance(raw, list):
for item in raw:
if isinstance(item, str) and item.strip():
return item.strip()
return ""
if isinstance(raw, str) and raw.strip():
return raw.strip()
return ""
def _parse_synced_at(value: str) -> float | None:
value = (value or "").strip()
if not value:
return None
for fmt in ("%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S%z"):
try:
parsed = datetime.strptime(value, fmt)
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=timezone.utc)
return parsed.timestamp()
except ValueError:
continue
return None
def _vaultwarden_email_for_user(user: dict[str, Any]) -> str:
username = (user.get("username") if isinstance(user.get("username"), str) else "") or ""
username = username.strip()
if not username:
return ""
attrs = user.get("attributes")
vaultwarden_email = _extract_attr(attrs, VAULTWARDEN_EMAIL_ATTR)
if vaultwarden_email:
return vaultwarden_email
mailu_email = _extract_attr(attrs, "mailu_email")
if mailu_email:
return mailu_email
email = (user.get("email") if isinstance(user.get("email"), str) else "") or ""
email = email.strip()
if email and email.lower().endswith(f"@{settings.MAILU_DOMAIN.lower()}"):
return email
# Don't guess an internal mailbox address until Mailu sync has run and stored mailu_email.
# This avoids spamming Vaultwarden invites that can never be delivered (unknown recipient).
return ""
def _set_user_attribute_if_missing(username: str, user: dict[str, Any], key: str, value: str) -> None:
value = (value or "").strip()
if not value:
return
existing = _extract_attr(user.get("attributes"), key)
if existing:
return
admin_client().set_user_attribute(username, key, value)
def _set_user_attribute(username: str, key: str, value: str) -> None:
value = (value or "").strip()
if not value:
return
admin_client().set_user_attribute(username, key, value)
def main() -> int:
processed = 0
created = 0
skipped = 0
failures = 0
consecutive_failures = 0
for user in _iter_keycloak_users():
username = (user.get("username") if isinstance(user.get("username"), str) else "") or ""
username = username.strip()
if not username:
skipped += 1
continue
enabled = user.get("enabled")
if enabled is False:
skipped += 1
continue
if user.get("serviceAccountClientId") or username.startswith("service-account-"):
skipped += 1
continue
# Fetch the full user payload so we can reliably read attributes (and skip re-invites).
user_id = (user.get("id") if isinstance(user.get("id"), str) else "") or ""
user_id = user_id.strip()
full_user = user
if user_id:
try:
full_user = admin_client().get_user(user_id)
except Exception:
full_user = user
current_status = _extract_attr(full_user.get("attributes"), VAULTWARDEN_STATUS_ATTR)
current_synced_at = _extract_attr(full_user.get("attributes"), VAULTWARDEN_SYNCED_AT_ATTR)
current_synced_ts = _parse_synced_at(current_synced_at)
if current_status in {"rate_limited", "error"} and current_synced_ts:
if time.time() - current_synced_ts < VAULTWARDEN_RETRY_COOLDOWN_SEC:
skipped += 1
continue
email = _vaultwarden_email_for_user(full_user)
if not email:
print(f"skip {username}: missing email", file=sys.stderr)
skipped += 1
continue
try:
_set_user_attribute_if_missing(username, full_user, VAULTWARDEN_EMAIL_ATTR, email)
except Exception:
pass
# If we've already successfully invited or confirmed presence, do not re-invite on every cron run.
# Vaultwarden returns 409 for "already exists", which is idempotent but noisy and can trigger rate limits.
if current_status in {"invited", "already_present"}:
if not current_synced_at:
try:
_set_user_attribute(
username,
VAULTWARDEN_SYNCED_AT_ATTR,
time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
)
except Exception:
pass
skipped += 1
continue
processed += 1
result = invite_user(email)
if result.ok:
created += 1
consecutive_failures = 0
print(f"ok {username}: {result.status}")
try:
_set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status)
_set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
except Exception:
pass
else:
failures += 1
if result.status in {"rate_limited", "error"}:
consecutive_failures += 1
print(f"err {username}: {result.status} {result.detail}", file=sys.stderr)
try:
_set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status)
_set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
except Exception:
pass
if consecutive_failures >= VAULTWARDEN_FAILURE_BAILOUT:
print("vaultwarden: too many consecutive failures; aborting run", file=sys.stderr)
break
print(
f"done processed={processed} created_or_present={created} skipped={skipped} failures={failures}",
file=sys.stderr,
)
return 0 if failures == 0 else 2
if __name__ == "__main__":
raise SystemExit(main())

View File

@ -1,86 +0,0 @@
# services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: vaultwarden-cred-sync
namespace: bstein-dev-home
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/15 * * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "bstein-dev-home"
vault.hashicorp.com/agent-inject-secret-portal-env.sh: "kv/data/atlas/portal/atlas-portal-db"
vault.hashicorp.com/agent-inject-template-portal-env.sh: |
{{ with secret "kv/data/atlas/portal/atlas-portal-db" }}
export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}"
{{ end }}
{{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }}
export KEYCLOAK_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/chat-ai-keys-runtime" }}
export CHAT_KEY_MATRIX="{{ .Data.data.matrix }}"
export CHAT_KEY_HOMEPAGE="{{ .Data.data.homepage }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/portal-e2e-client" }}
export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}"
export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}"
{{ end }}
spec:
serviceAccountName: bstein-dev-home
restartPolicy: Never
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
imagePullSecrets:
- name: harbor-regcred
containers:
- name: sync
image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-95
imagePullPolicy: Always
command: ["/bin/sh", "-c"]
args:
- >-
. /vault/secrets/portal-env.sh
&& exec python /scripts/vaultwarden_cred_sync.py
env:
- name: PYTHONPATH
value: /app
- name: KEYCLOAK_ENABLED
value: "true"
- name: KEYCLOAK_REALM
value: atlas
- name: KEYCLOAK_ADMIN_URL
value: http://keycloak.sso.svc.cluster.local
- name: KEYCLOAK_ADMIN_REALM
value: atlas
- name: KEYCLOAK_ADMIN_CLIENT_ID
value: bstein-dev-home-admin
- name: HTTP_CHECK_TIMEOUT_SEC
value: "20"
- name: VAULTWARDEN_ADMIN_SESSION_TTL_SEC
value: "900"
- name: VAULTWARDEN_RETRY_COOLDOWN_SEC
value: "1800"
- name: VAULTWARDEN_FAILURE_BAILOUT
value: "2"
volumeMounts:
- name: vaultwarden-cred-sync-script
mountPath: /scripts
readOnly: true
volumes:
- name: vaultwarden-cred-sync-script
configMap:
name: vaultwarden-cred-sync-script
defaultMode: 0555

View File

@ -1,471 +0,0 @@
# services/comms/guest-name-job.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: guest-name-randomizer
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/1 * * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "comms"
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
vault.hashicorp.com/agent-inject-template-turn-secret: |
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
vault.hashicorp.com/agent-inject-template-livekit-primary: |
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-bot-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-seeder-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-matrix: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-homepage: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
spec:
restartPolicy: Never
serviceAccountName: comms-vault
nodeSelector:
hardware: rpi5
volumes:
- name: vault-scripts
configMap:
name: comms-vault-env
defaultMode: 0555
containers:
- name: rename
image: registry.bstein.dev/bstein/comms-guest-tools:0.1.0
volumeMounts:
- name: vault-scripts
mountPath: /vault/scripts
readOnly: true
env:
- name: SYNAPSE_BASE
value: http://othrys-synapse-matrix-synapse:8008
- name: MAS_ADMIN_CLIENT_ID
value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM
- name: MAS_ADMIN_CLIENT_SECRET_FILE
value: /vault/secrets/mas-admin-secret
- name: MAS_ADMIN_API_BASE
value: http://matrix-authentication-service:8081/api/admin/v1
- name: MAS_TOKEN_URL
value: http://matrix-authentication-service:8080/oauth2/token
- name: SEEDER_USER
value: othrys-seeder
- name: PGHOST
value: postgres-service.postgres.svc.cluster.local
- name: PGPORT
value: "5432"
- name: PGDATABASE
value: synapse
- name: PGUSER
value: synapse
command:
- /bin/sh
- -c
- |
set -euo pipefail
. /vault/scripts/comms_vault_env.sh
python - <<'PY'
import base64
import os
import random
import requests
import time
import urllib.parse
import psycopg2
ADJ = [
"brisk","calm","eager","gentle","merry","nifty","rapid","sunny","witty","zesty",
"amber","bold","bright","crisp","daring","frosty","glad","jolly","lively","mellow",
"quiet","ripe","serene","spry","tidy","vivid","warm","wild","clever","kind",
]
NOUN = [
"otter","falcon","comet","ember","grove","harbor","meadow","raven","river","summit",
"breeze","cedar","cinder","cove","delta","forest","glade","lark","marsh","peak",
"pine","quartz","reef","ridge","sable","sage","shore","thunder","vale","zephyr",
]
BASE = os.environ["SYNAPSE_BASE"]
MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"]
MAS_ADMIN_CLIENT_SECRET_FILE = os.environ["MAS_ADMIN_CLIENT_SECRET_FILE"]
MAS_ADMIN_API_BASE = os.environ["MAS_ADMIN_API_BASE"].rstrip("/")
MAS_TOKEN_URL = os.environ["MAS_TOKEN_URL"]
SEEDER_USER = os.environ["SEEDER_USER"]
ROOM_ALIAS = "#othrys:live.bstein.dev"
SERVER_NAME = "live.bstein.dev"
STALE_GUEST_MS = 14 * 24 * 60 * 60 * 1000
def mas_admin_token():
with open(MAS_ADMIN_CLIENT_SECRET_FILE, "r", encoding="utf-8") as f:
secret = f.read().strip()
basic = base64.b64encode(f"{MAS_ADMIN_CLIENT_ID}:{secret}".encode()).decode()
last_err = None
for attempt in range(5):
try:
r = requests.post(
MAS_TOKEN_URL,
headers={"Authorization": f"Basic {basic}"},
data={"grant_type": "client_credentials", "scope": "urn:mas:admin"},
timeout=30,
)
r.raise_for_status()
return r.json()["access_token"]
except Exception as exc: # noqa: BLE001
last_err = exc
time.sleep(2 ** attempt)
raise last_err
def mas_user_id(token, username):
r = requests.get(
f"{MAS_ADMIN_API_BASE}/users/by-username/{urllib.parse.quote(username)}",
headers={"Authorization": f"Bearer {token}"},
timeout=30,
)
r.raise_for_status()
return r.json()["data"]["id"]
def mas_personal_session(token, user_id):
r = requests.post(
f"{MAS_ADMIN_API_BASE}/personal-sessions",
headers={"Authorization": f"Bearer {token}"},
json={
"actor_user_id": user_id,
"human_name": "guest-name-randomizer",
"scope": "urn:matrix:client:api:*",
"expires_in": 300,
},
timeout=30,
)
r.raise_for_status()
data = r.json().get("data", {}).get("attributes", {}) or {}
return data["access_token"], r.json()["data"]["id"]
def mas_revoke_session(token, session_id):
requests.post(
f"{MAS_ADMIN_API_BASE}/personal-sessions/{urllib.parse.quote(session_id)}/revoke",
headers={"Authorization": f"Bearer {token}"},
json={},
timeout=30,
)
def resolve_alias(token, alias):
headers = {"Authorization": f"Bearer {token}"}
enc = urllib.parse.quote(alias)
r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=headers)
r.raise_for_status()
return r.json()["room_id"]
def room_members(token, room_id):
headers = {"Authorization": f"Bearer {token}"}
r = requests.get(f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members", headers=headers)
r.raise_for_status()
members = set()
existing_names = set()
for ev in r.json().get("chunk", []):
user_id = ev.get("state_key")
if user_id:
members.add(user_id)
disp = (ev.get("content") or {}).get("displayname")
if disp:
existing_names.add(disp)
return members, existing_names
def mas_list_users(token):
headers = {"Authorization": f"Bearer {token}"}
users = []
cursor = None
while True:
url = f"{MAS_ADMIN_API_BASE}/users?page[size]=100"
if cursor:
url += f"&page[after]={urllib.parse.quote(cursor)}"
r = requests.get(url, headers=headers, timeout=30)
r.raise_for_status()
data = r.json().get("data", [])
if not data:
break
users.extend(data)
cursor = data[-1].get("meta", {}).get("page", {}).get("cursor")
if not cursor:
break
return users
def synapse_list_users(token):
headers = {"Authorization": f"Bearer {token}"}
users = []
from_token = None
while True:
url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100"
if from_token:
url += f"&from={urllib.parse.quote(from_token)}"
r = requests.get(url, headers=headers, timeout=30)
r.raise_for_status()
payload = r.json()
users.extend(payload.get("users", []))
from_token = payload.get("next_token")
if not from_token:
break
return users
def should_prune_guest(entry, now_ms):
if not entry.get("is_guest"):
return False
last_seen = entry.get("last_seen_ts")
if last_seen is None:
return False
try:
last_seen = int(last_seen)
except (TypeError, ValueError):
return False
return now_ms - last_seen > STALE_GUEST_MS
def prune_guest(token, user_id):
headers = {"Authorization": f"Bearer {token}"}
try:
r = requests.delete(
f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
headers=headers,
params={"erase": "true"},
timeout=30,
)
except Exception as exc: # noqa: BLE001
print(f"guest prune failed for {user_id}: {exc}")
return False
if r.status_code in (200, 202, 204, 404):
return True
print(f"guest prune failed for {user_id}: {r.status_code} {r.text}")
return False
def user_id_for_username(username):
return f"@{username}:live.bstein.dev"
def get_displayname(token, user_id):
headers = {"Authorization": f"Bearer {token}"}
r = requests.get(f"{BASE}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}", headers=headers)
r.raise_for_status()
return r.json().get("displayname")
def get_displayname_admin(token, user_id):
headers = {"Authorization": f"Bearer {token}"}
r = requests.get(
f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
headers=headers,
timeout=30,
)
if r.status_code == 404:
return None
r.raise_for_status()
return r.json().get("displayname")
def set_displayname(token, room_id, user_id, name, in_room):
headers = {"Authorization": f"Bearer {token}"}
payload = {"displayname": name}
r = requests.put(
f"{BASE}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}/displayname",
headers=headers,
json=payload,
)
r.raise_for_status()
if not in_room:
return
state_url = f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.member/{urllib.parse.quote(user_id)}"
content = {"membership": "join", "displayname": name}
requests.put(state_url, headers=headers, json=content, timeout=30)
def set_displayname_admin(token, user_id, name):
headers = {"Authorization": f"Bearer {token}"}
payload = {"displayname": name}
r = requests.put(
f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
headers=headers,
json=payload,
timeout=30,
)
if r.status_code in (200, 201, 204):
return True
return False
def needs_rename_username(username):
return username.isdigit() or username.startswith("guest-")
def needs_rename_display(display):
return not display or display.isdigit() or display.startswith("guest-")
def db_rename_numeric(existing_names):
profile_rows = []
profile_index = {}
users = []
conn = psycopg2.connect(
host=os.environ["PGHOST"],
port=int(os.environ["PGPORT"]),
dbname=os.environ["PGDATABASE"],
user=os.environ["PGUSER"],
password=os.environ["PGPASSWORD"],
)
try:
with conn:
with conn.cursor() as cur:
cur.execute(
"SELECT user_id, full_user_id, displayname FROM profiles WHERE full_user_id ~ %s",
(f"^@\\d+:{SERVER_NAME}$",),
)
profile_rows = cur.fetchall()
profile_index = {row[1]: row for row in profile_rows}
for user_id, full_user_id, display in profile_rows:
if display and not needs_rename_display(display):
continue
new = None
for _ in range(30):
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
if candidate not in existing_names:
new = candidate
existing_names.add(candidate)
break
if not new:
continue
cur.execute(
"UPDATE profiles SET displayname = %s WHERE full_user_id = %s",
(new, full_user_id),
)
cur.execute(
"SELECT name FROM users WHERE name ~ %s",
(f"^@\\d+:{SERVER_NAME}$",),
)
users = [row[0] for row in cur.fetchall()]
if not users:
return
cur.execute(
"SELECT user_id, full_user_id FROM profiles WHERE full_user_id = ANY(%s)",
(users,),
)
for existing_full in cur.fetchall():
profile_index.setdefault(existing_full[1], existing_full)
for full_user_id in users:
if full_user_id in profile_index:
continue
localpart = full_user_id.split(":", 1)[0].lstrip("@")
new = None
for _ in range(30):
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
if candidate not in existing_names:
new = candidate
existing_names.add(candidate)
break
if not new:
continue
cur.execute(
"INSERT INTO profiles (user_id, displayname, full_user_id) VALUES (%s, %s, %s) "
"ON CONFLICT (full_user_id) DO UPDATE SET displayname = EXCLUDED.displayname",
(localpart, new, full_user_id),
)
finally:
conn.close()
admin_token = mas_admin_token()
seeder_id = mas_user_id(admin_token, SEEDER_USER)
seeder_token, seeder_session = mas_personal_session(admin_token, seeder_id)
try:
room_id = resolve_alias(seeder_token, ROOM_ALIAS)
members, existing = room_members(seeder_token, room_id)
users = mas_list_users(admin_token)
mas_usernames = set()
for user in users:
attrs = user.get("attributes") or {}
username = attrs.get("username") or ""
if username:
mas_usernames.add(username)
legacy_guest = attrs.get("legacy_guest")
if not username:
continue
if not (legacy_guest or needs_rename_username(username)):
continue
user_id = user_id_for_username(username)
access_token, session_id = mas_personal_session(admin_token, user["id"])
try:
display = get_displayname(access_token, user_id)
if display and not needs_rename_display(display):
continue
new = None
for _ in range(30):
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
if candidate not in existing:
new = candidate
existing.add(candidate)
break
if not new:
continue
set_displayname(access_token, room_id, user_id, new, user_id in members)
finally:
mas_revoke_session(admin_token, session_id)
try:
entries = synapse_list_users(seeder_token)
except Exception as exc: # noqa: BLE001
print(f"synapse admin list skipped: {exc}")
entries = []
now_ms = int(time.time() * 1000)
for entry in entries:
user_id = entry.get("name") or ""
if not user_id.startswith("@"):
continue
localpart = user_id.split(":", 1)[0].lstrip("@")
if localpart in mas_usernames:
continue
is_guest = entry.get("is_guest")
if is_guest and should_prune_guest(entry, now_ms):
if prune_guest(seeder_token, user_id):
continue
if not (is_guest or needs_rename_username(localpart)):
continue
display = get_displayname_admin(seeder_token, user_id)
if display and not needs_rename_display(display):
continue
new = None
for _ in range(30):
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
if candidate not in existing:
new = candidate
existing.add(candidate)
break
if not new:
continue
if not set_displayname_admin(seeder_token, user_id, new):
continue
db_rename_numeric(existing)
finally:
mas_revoke_session(admin_token, seeder_session)
PY

View File

@ -34,11 +34,7 @@ resources:
- livekit-token-deployment.yaml
- livekit.yaml
- coturn.yaml
- seed-othrys-room.yaml
- guest-name-job.yaml
- oneoffs/othrys-kick-numeric-job.yaml
- pin-othrys-job.yaml
- reset-othrys-room-job.yaml
- oneoffs/bstein-force-leave-job.yaml
- livekit-ingress.yaml
- livekit-middlewares.yaml

View File

@ -1,169 +0,0 @@
# services/comms/pin-othrys-job.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: pin-othrys-invite
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/30 * * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "comms"
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
vault.hashicorp.com/agent-inject-template-turn-secret: |
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
vault.hashicorp.com/agent-inject-template-livekit-primary: |
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-bot-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-seeder-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-matrix: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-homepage: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
spec:
restartPolicy: Never
serviceAccountName: comms-vault
containers:
- name: pin
image: python:3.11-slim
env:
- name: SYNAPSE_BASE
value: http://othrys-synapse-matrix-synapse:8008
- name: AUTH_BASE
value: http://matrix-authentication-service:8080
- name: SEEDER_USER
value: othrys-seeder
command:
- /bin/sh
- -c
- |
set -euo pipefail
. /vault/scripts/comms_vault_env.sh
pip install --no-cache-dir requests >/dev/null
python - <<'PY'
import os, requests, urllib.parse
BASE = os.environ["SYNAPSE_BASE"]
AUTH_BASE = os.environ.get("AUTH_BASE", BASE)
ROOM_ALIAS = "#othrys:live.bstein.dev"
MESSAGE = (
"Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join "
"and choose 'Continue' -> 'Join as guest'."
)
def auth(token): return {"Authorization": f"Bearer {token}"}
def canon_user(user):
u = (user or "").strip()
if u.startswith("@") and ":" in u:
return u
u = u.lstrip("@")
if ":" in u:
return f"@{u}"
return f"@{u}:live.bstein.dev"
def login(user, password):
r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={
"type": "m.login.password",
"identifier": {"type": "m.id.user", "user": canon_user(user)},
"password": password,
})
r.raise_for_status()
return r.json()["access_token"]
def resolve(alias, token):
enc = urllib.parse.quote(alias)
r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token))
r.raise_for_status()
return r.json()["room_id"]
def get_pinned(room_id, token):
r = requests.get(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events",
headers=auth(token),
)
if r.status_code == 404:
return []
r.raise_for_status()
return r.json().get("pinned", [])
def get_event(room_id, event_id, token):
r = requests.get(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/event/{urllib.parse.quote(event_id)}",
headers=auth(token),
)
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()
def send(room_id, token, body):
r = requests.post(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message",
headers=auth(token),
json={"msgtype": "m.text", "body": body},
)
r.raise_for_status()
return r.json()["event_id"]
def pin(room_id, token, event_id):
r = requests.put(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events",
headers=auth(token),
json={"pinned": [event_id]},
)
r.raise_for_status()
token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"])
room_id = resolve(ROOM_ALIAS, token)
for event_id in get_pinned(room_id, token):
ev = get_event(room_id, event_id, token)
if ev and ev.get("content", {}).get("body") == MESSAGE:
raise SystemExit(0)
eid = send(room_id, token, MESSAGE)
pin(room_id, token, eid)
PY
volumeMounts:
- name: vault-scripts
mountPath: /vault/scripts
readOnly: true
volumes:
- name: vault-scripts
configMap:
name: comms-vault-env
defaultMode: 0555

View File

@ -1,312 +0,0 @@
# services/comms/reset-othrys-room-job.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: othrys-room-reset
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "0 0 1 1 *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "comms"
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
vault.hashicorp.com/agent-inject-template-turn-secret: |
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
vault.hashicorp.com/agent-inject-template-livekit-primary: |
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-bot-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-seeder-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-matrix: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-homepage: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
spec:
restartPolicy: Never
serviceAccountName: comms-vault
containers:
- name: reset
image: python:3.11-slim
env:
- name: SYNAPSE_BASE
value: http://othrys-synapse-matrix-synapse:8008
- name: AUTH_BASE
value: http://matrix-authentication-service:8080
- name: SERVER_NAME
value: live.bstein.dev
- name: ROOM_ALIAS
value: "#othrys:live.bstein.dev"
- name: ROOM_NAME
value: Othrys
- name: PIN_MESSAGE
value: "Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join and choose 'Continue' -> 'Join as guest'."
- name: SEEDER_USER
value: othrys-seeder
- name: BOT_USER
value: atlasbot
command:
- /bin/sh
- -c
- |
set -euo pipefail
. /vault/scripts/comms_vault_env.sh
pip install --no-cache-dir requests >/dev/null
python - <<'PY'
import os
import time
import urllib.parse
import requests
BASE = os.environ["SYNAPSE_BASE"]
AUTH_BASE = os.environ.get("AUTH_BASE", BASE)
SERVER_NAME = os.environ.get("SERVER_NAME", "live.bstein.dev")
ROOM_ALIAS = os.environ.get("ROOM_ALIAS", "#othrys:live.bstein.dev")
ROOM_NAME = os.environ.get("ROOM_NAME", "Othrys")
PIN_MESSAGE = os.environ["PIN_MESSAGE"]
SEEDER_USER = os.environ["SEEDER_USER"]
SEEDER_PASS = os.environ["SEEDER_PASS"]
BOT_USER = os.environ["BOT_USER"]
POWER_LEVELS = {
"ban": 50,
"events": {
"m.room.avatar": 50,
"m.room.canonical_alias": 50,
"m.room.encryption": 100,
"m.room.history_visibility": 100,
"m.room.name": 50,
"m.room.power_levels": 100,
"m.room.server_acl": 100,
"m.room.tombstone": 100,
},
"events_default": 0,
"historical": 100,
"invite": 50,
"kick": 50,
"m.call.invite": 50,
"redact": 50,
"state_default": 50,
"users": {f"@{SEEDER_USER}:{SERVER_NAME}": 100},
"users_default": 0,
}
def auth(token):
return {"Authorization": f"Bearer {token}"}
def canon_user(user):
u = (user or "").strip()
if u.startswith("@") and ":" in u:
return u
u = u.lstrip("@")
if ":" in u:
return f"@{u}"
return f"@{u}:{SERVER_NAME}"
def login(user, password):
r = requests.post(
f"{AUTH_BASE}/_matrix/client/v3/login",
json={
"type": "m.login.password",
"identifier": {"type": "m.id.user", "user": canon_user(user)},
"password": password,
},
)
if r.status_code != 200:
raise SystemExit(f"login failed: {r.status_code} {r.text}")
return r.json()["access_token"]
def resolve_alias(token, alias):
enc = urllib.parse.quote(alias)
r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token))
if r.status_code == 404:
return None
r.raise_for_status()
return r.json()["room_id"]
def create_room(token):
r = requests.post(
f"{BASE}/_matrix/client/v3/createRoom",
headers=auth(token),
json={
"preset": "public_chat",
"name": ROOM_NAME,
"room_version": "11",
},
)
r.raise_for_status()
return r.json()["room_id"]
def put_state(token, room_id, ev_type, content):
r = requests.put(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/{ev_type}",
headers=auth(token),
json=content,
)
r.raise_for_status()
def set_directory_visibility(token, room_id, visibility):
r = requests.put(
f"{BASE}/_matrix/client/v3/directory/list/room/{urllib.parse.quote(room_id)}",
headers=auth(token),
json={"visibility": visibility},
)
r.raise_for_status()
def delete_alias(token, alias):
enc = urllib.parse.quote(alias)
r = requests.delete(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token))
if r.status_code in (200, 202, 404):
return
r.raise_for_status()
def put_alias(token, alias, room_id):
enc = urllib.parse.quote(alias)
r = requests.put(
f"{BASE}/_matrix/client/v3/directory/room/{enc}",
headers=auth(token),
json={"room_id": room_id},
)
r.raise_for_status()
def list_joined_members(token, room_id):
r = requests.get(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members?membership=join",
headers=auth(token),
)
r.raise_for_status()
members = []
for ev in r.json().get("chunk", []):
if ev.get("type") != "m.room.member":
continue
uid = ev.get("state_key")
if not isinstance(uid, str) or not uid.startswith("@"):
continue
members.append(uid)
return members
def invite_user(token, room_id, user_id):
r = requests.post(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/invite",
headers=auth(token),
json={"user_id": user_id},
)
if r.status_code in (200, 202):
return
r.raise_for_status()
def send_message(token, room_id, body):
r = requests.post(
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message",
headers=auth(token),
json={"msgtype": "m.text", "body": body},
)
r.raise_for_status()
return r.json()["event_id"]
def login_with_retry():
last = None
for attempt in range(1, 6):
try:
return login(SEEDER_USER, SEEDER_PASS)
except Exception as exc: # noqa: BLE001
last = exc
time.sleep(attempt * 2)
raise last
token = login_with_retry()
old_room_id = resolve_alias(token, ROOM_ALIAS)
if not old_room_id:
raise SystemExit(f"alias {ROOM_ALIAS} not found; refusing to proceed")
new_room_id = create_room(token)
# Configure the new room.
put_state(token, new_room_id, "m.room.join_rules", {"join_rule": "public"})
put_state(token, new_room_id, "m.room.guest_access", {"guest_access": "can_join"})
put_state(token, new_room_id, "m.room.history_visibility", {"history_visibility": "shared"})
put_state(token, new_room_id, "m.room.power_levels", POWER_LEVELS)
# Move the alias.
delete_alias(token, ROOM_ALIAS)
put_alias(token, ROOM_ALIAS, new_room_id)
put_state(token, new_room_id, "m.room.canonical_alias", {"alias": ROOM_ALIAS})
set_directory_visibility(token, new_room_id, "public")
# Invite the bot and all joined members of the old room.
bot_user_id = f"@{BOT_USER}:{SERVER_NAME}"
invite_user(token, new_room_id, bot_user_id)
for uid in list_joined_members(token, old_room_id):
if uid == f"@{SEEDER_USER}:{SERVER_NAME}":
continue
localpart = uid.split(":", 1)[0].lstrip("@")
if localpart.isdigit():
continue
invite_user(token, new_room_id, uid)
# Pin the guest invite message in the new room.
event_id = send_message(token, new_room_id, PIN_MESSAGE)
put_state(token, new_room_id, "m.room.pinned_events", {"pinned": [event_id]})
# De-list and tombstone the old room.
set_directory_visibility(token, old_room_id, "private")
put_state(token, old_room_id, "m.room.join_rules", {"join_rule": "invite"})
put_state(token, old_room_id, "m.room.guest_access", {"guest_access": "forbidden"})
put_state(
token,
old_room_id,
"m.room.tombstone",
{"body": "Othrys has been reset. Please join the new room.", "replacement_room": new_room_id},
)
send_message(
token,
old_room_id,
"Othrys was reset. Join the new room at https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join",
)
print(f"old_room_id={old_room_id}")
print(f"new_room_id={new_room_id}")
PY
volumeMounts:
- name: vault-scripts
mountPath: /vault/scripts
readOnly: true
volumes:
- name: vault-scripts
configMap:
name: comms-vault-env
defaultMode: 0555

View File

@ -1,185 +0,0 @@
# services/comms/seed-othrys-room.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: seed-othrys-room
namespace: comms
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "*/10 * * * *"
suspend: true
concurrencyPolicy: Forbid
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "comms"
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
vault.hashicorp.com/agent-inject-template-turn-secret: |
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
vault.hashicorp.com/agent-inject-template-livekit-primary: |
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-bot-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
vault.hashicorp.com/agent-inject-template-seeder-pass: |
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-matrix: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
vault.hashicorp.com/agent-inject-template-chat-homepage: |
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
spec:
restartPolicy: Never
serviceAccountName: comms-vault
containers:
- name: seed
image: python:3.11-slim
env:
- name: SYNAPSE_BASE
value: http://othrys-synapse-matrix-synapse:8008
- name: AUTH_BASE
value: http://matrix-authentication-service:8080
- name: SEEDER_USER
value: othrys-seeder
- name: BOT_USER
value: atlasbot
command:
- /bin/sh
- -c
- |
set -euo pipefail
. /vault/scripts/comms_vault_env.sh
pip install --no-cache-dir requests pyyaml >/dev/null
python - <<'PY'
import os, requests, urllib.parse
BASE = os.environ["SYNAPSE_BASE"]
AUTH_BASE = os.environ.get("AUTH_BASE", BASE)
def canon_user(user):
u = (user or "").strip()
if u.startswith("@") and ":" in u:
return u
u = u.lstrip("@")
if ":" in u:
return f"@{u}"
return f"@{u}:live.bstein.dev"
def login(user, password):
r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={
"type": "m.login.password",
"identifier": {"type": "m.id.user", "user": canon_user(user)},
"password": password,
})
if r.status_code != 200:
raise SystemExit(f"login failed: {r.status_code} {r.text}")
return r.json()["access_token"]
def ensure_user(token, localpart, password, admin):
headers = {"Authorization": f"Bearer {token}"}
user_id = f"@{localpart}:live.bstein.dev"
url = f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}"
res = requests.get(url, headers=headers)
if res.status_code == 200:
return
payload = {"password": password, "admin": admin, "deactivated": False}
create = requests.put(url, headers=headers, json=payload)
if create.status_code not in (200, 201):
raise SystemExit(f"create user {user_id} failed: {create.status_code} {create.text}")
def ensure_room(token):
headers = {"Authorization": f"Bearer {token}"}
alias = "#othrys:live.bstein.dev"
alias_enc = "%23othrys%3Alive.bstein.dev"
exists = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{alias_enc}", headers=headers)
if exists.status_code == 200:
room_id = exists.json()["room_id"]
else:
create = requests.post(f"{BASE}/_matrix/client/v3/createRoom", headers=headers, json={
"preset": "public_chat",
"name": "Othrys",
"room_alias_name": "othrys",
"initial_state": [],
"power_level_content_override": {"events_default": 0, "users_default": 0, "state_default": 50},
})
if create.status_code not in (200, 409):
raise SystemExit(f"create room failed: {create.status_code} {create.text}")
exists = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{alias_enc}", headers=headers)
room_id = exists.json()["room_id"]
state_events = [
("m.room.join_rules", {"join_rule": "public"}),
("m.room.guest_access", {"guest_access": "can_join"}),
("m.room.history_visibility", {"history_visibility": "shared"}),
("m.room.canonical_alias", {"alias": alias}),
]
for ev_type, content in state_events:
requests.put(f"{BASE}/_matrix/client/v3/rooms/{room_id}/state/{ev_type}", headers=headers, json=content)
requests.put(f"{BASE}/_matrix/client/v3/directory/list/room/{room_id}", headers=headers, json={"visibility": "public"})
return room_id
def join_user(token, room_id, user_id):
headers = {"Authorization": f"Bearer {token}"}
requests.post(f"{BASE}/_synapse/admin/v1/join/{urllib.parse.quote(room_id)}", headers=headers, json={"user_id": user_id})
def join_all_locals(token, room_id):
headers = {"Authorization": f"Bearer {token}"}
users = []
from_token = None
while True:
url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100"
if from_token:
url += f"&from={from_token}"
res = requests.get(url, headers=headers).json()
users.extend([u["name"] for u in res.get("users", [])])
from_token = res.get("next_token")
if not from_token:
break
for uid in users:
join_user(token, room_id, uid)
token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"])
ensure_user(token, os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"], admin=True)
ensure_user(token, os.environ["BOT_USER"], os.environ["BOT_PASS"], admin=False)
room_id = ensure_room(token)
join_user(token, room_id, f"@{os.environ['BOT_USER']}:live.bstein.dev")
join_all_locals(token, room_id)
PY
volumeMounts:
- name: synapse-config
mountPath: /config
readOnly: true
- name: vault-scripts
mountPath: /vault/scripts
readOnly: true
volumes:
- name: synapse-config
secret:
secretName: othrys-synapse-matrix-synapse
- name: vault-scripts
configMap:
name: comms-vault-env
defaultMode: 0555

View File

@ -1,56 +0,0 @@
# services/finance/firefly-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: firefly-cron
namespace: finance
spec:
schedule: "0 3 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 1
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "finance"
vault.hashicorp.com/agent-inject-secret-firefly-cron-token: "kv/data/atlas/finance/firefly-secrets"
vault.hashicorp.com/agent-inject-template-firefly-cron-token: |
{{- with secret "kv/data/atlas/finance/firefly-secrets" -}}
{{ .Data.data.STATIC_CRON_TOKEN }}
{{- end -}}
spec:
serviceAccountName: finance-vault
restartPolicy: Never
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 70
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: cron
image: curlimages/curl:8.5.0
command: ["/bin/sh", "-c"]
args:
- |
set -eu
token="$(cat /vault/secrets/firefly-cron-token)"
curl -fsS "http://firefly.finance.svc.cluster.local/api/v1/cron/${token}"

View File

@ -1,92 +0,0 @@
# services/finance/firefly-user-sync-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: firefly-user-sync
namespace: finance
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "0 6 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "finance"
vault.hashicorp.com/agent-inject-secret-firefly-env.sh: "kv/data/atlas/finance/firefly-db"
vault.hashicorp.com/agent-inject-template-firefly-env.sh: |
{{ with secret "kv/data/atlas/finance/firefly-db" }}
export DB_CONNECTION="pgsql"
export DB_HOST="{{ .Data.data.DB_HOST }}"
export DB_PORT="{{ .Data.data.DB_PORT }}"
export DB_DATABASE="{{ .Data.data.DB_DATABASE }}"
export DB_USERNAME="{{ .Data.data.DB_USERNAME }}"
export DB_PASSWORD="$(cat /vault/secrets/firefly-db-password)"
{{ end }}
{{ with secret "kv/data/atlas/finance/firefly-secrets" }}
export APP_KEY="$(cat /vault/secrets/firefly-app-key)"
{{ end }}
vault.hashicorp.com/agent-inject-secret-firefly-db-password: "kv/data/atlas/finance/firefly-db"
vault.hashicorp.com/agent-inject-template-firefly-db-password: |
{{- with secret "kv/data/atlas/finance/firefly-db" -}}
{{ .Data.data.DB_PASSWORD }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-firefly-app-key: "kv/data/atlas/finance/firefly-secrets"
vault.hashicorp.com/agent-inject-template-firefly-app-key: |
{{- with secret "kv/data/atlas/finance/firefly-secrets" -}}
{{ .Data.data.APP_KEY }}
{{- end -}}
spec:
serviceAccountName: finance-vault
restartPolicy: Never
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 70
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: sync
image: fireflyiii/core:version-6.4.15
command: ["/bin/sh", "-c"]
args:
- |
set -eu
. /vault/secrets/firefly-env.sh
exec php /scripts/firefly_user_sync.php
env:
- name: APP_ENV
value: production
- name: APP_DEBUG
value: "false"
- name: TZ
value: Etc/UTC
volumeMounts:
- name: firefly-user-sync-script
mountPath: /scripts
readOnly: true
volumes:
- name: firefly-user-sync-script
configMap:
name: firefly-user-sync-script
defaultMode: 0555

View File

@ -12,8 +12,6 @@ resources:
- oneoffs/finance-secrets-ensure-job.yaml
- actual-budget-deployment.yaml
- firefly-deployment.yaml
- firefly-user-sync-cronjob.yaml
- firefly-cronjob.yaml
- actual-budget-service.yaml
- firefly-service.yaml
- actual-budget-ingress.yaml
@ -24,9 +22,6 @@ configMapGenerator:
- name: actual-openid-bootstrap-script
files:
- actual_openid_bootstrap.mjs=scripts/actual_openid_bootstrap.mjs
- name: firefly-user-sync-script
files:
- firefly_user_sync.php=scripts/firefly_user_sync.php
- name: finance-secrets-ensure-script
files:
- finance_secrets_ensure.py=scripts/finance_secrets_ensure.py

View File

@ -1,114 +0,0 @@
#!/usr/bin/env php
<?php
declare(strict_types=1);
use FireflyIII\Console\Commands\Correction\CreatesGroupMemberships;
use FireflyIII\Models\Role;
use FireflyIII\Repositories\User\UserRepositoryInterface;
use FireflyIII\Support\Facades\FireflyConfig;
use FireflyIII\User;
use Illuminate\Contracts\Console\Kernel as ConsoleKernel;
function log_line(string $message): void
{
fwrite(STDOUT, $message . PHP_EOL);
}
function error_line(string $message): void
{
fwrite(STDERR, $message . PHP_EOL);
}
function find_app_root(): string
{
$candidates = [];
$env_root = getenv('FIREFLY_APP_DIR') ?: '';
if ($env_root !== '') {
$candidates[] = $env_root;
}
$candidates[] = '/var/www/html';
$candidates[] = '/var/www/firefly-iii';
$candidates[] = '/app';
foreach ($candidates as $candidate) {
if (!is_dir($candidate)) {
continue;
}
if (file_exists($candidate . '/vendor/autoload.php')) {
return $candidate;
}
}
return '';
}
$email = trim((string) getenv('FIREFLY_USER_EMAIL'));
$password = (string) getenv('FIREFLY_USER_PASSWORD');
if ($email === '' || $password === '') {
error_line('missing FIREFLY_USER_EMAIL or FIREFLY_USER_PASSWORD');
exit(1);
}
$root = find_app_root();
if ($root === '') {
error_line('firefly app root not found');
exit(1);
}
$autoload = $root . '/vendor/autoload.php';
$app_bootstrap = $root . '/bootstrap/app.php';
if (!file_exists($autoload) || !file_exists($app_bootstrap)) {
error_line('firefly bootstrap files missing');
exit(1);
}
require $autoload;
$app = require $app_bootstrap;
$kernel = $app->make(ConsoleKernel::class);
$kernel->bootstrap();
try {
FireflyConfig::set('single_user_mode', true);
} catch (Throwable $exc) {
error_line('failed to enforce single_user_mode: '.$exc->getMessage());
}
$repository = $app->make(UserRepositoryInterface::class);
$existing_user = User::where('email', $email)->first();
$first_user = User::count() == 0;
if (!$existing_user) {
$existing_user = User::create(
[
'email' => $email,
'password' => bcrypt($password),
'blocked' => false,
'blocked_code' => null,
]
);
if ($first_user) {
$role = Role::where('name', 'owner')->first();
if ($role) {
$existing_user->roles()->attach($role);
}
}
log_line(sprintf('created firefly user %s', $email));
} else {
log_line(sprintf('updating firefly user %s', $email));
}
$existing_user->blocked = false;
$existing_user->blocked_code = null;
$existing_user->save();
$repository->changePassword($existing_user, $password);
CreatesGroupMemberships::createGroupMembership($existing_user);
log_line('firefly user sync complete');

View File

@ -53,7 +53,7 @@ spec:
registry:
existingClaim: harbor-registry
accessMode: ReadWriteOnce
size: 100Gi
size: 50Gi
jobservice:
jobLog:
existingClaim: harbor-jobservice-logs
@ -77,7 +77,6 @@ spec:
internal:
nodeSelector:
ananke.bstein.dev/harbor-bootstrap: "true"
kubernetes.io/hostname: titan-11
image:
repository: registry.bstein.dev/infra/harbor-redis
tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-redis:tag"}
@ -114,7 +113,6 @@ spec:
core:
nodeSelector:
ananke.bstein.dev/harbor-bootstrap: "true"
kubernetes.io/hostname: titan-11
image:
repository: registry.bstein.dev/infra/harbor-core
tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-core:tag"}
@ -127,10 +125,6 @@ spec:
podAnnotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "harbor"
vault.hashicorp.com/agent-requests-cpu: "25m"
vault.hashicorp.com/agent-limits-cpu: "100m"
vault.hashicorp.com/agent-requests-mem: "32Mi"
vault.hashicorp.com/agent-limits-mem: "128Mi"
vault.hashicorp.com/agent-inject-secret-harbor-core-env.sh: "kv/data/atlas/harbor/harbor-core"
vault.hashicorp.com/agent-inject-template-harbor-core-env.sh: |
{{ with secret "kv/data/atlas/harbor/harbor-core" }}
@ -180,7 +174,6 @@ spec:
jobservice:
nodeSelector:
ananke.bstein.dev/harbor-bootstrap: "true"
kubernetes.io/hostname: titan-11
image:
repository: registry.bstein.dev/infra/harbor-jobservice
tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-jobservice:tag"}
@ -190,10 +183,6 @@ spec:
podAnnotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "harbor"
vault.hashicorp.com/agent-requests-cpu: "25m"
vault.hashicorp.com/agent-limits-cpu: "100m"
vault.hashicorp.com/agent-requests-mem: "32Mi"
vault.hashicorp.com/agent-limits-mem: "128Mi"
vault.hashicorp.com/agent-inject-secret-harbor-jobservice-env.sh: "kv/data/atlas/harbor/harbor-jobservice"
vault.hashicorp.com/agent-inject-template-harbor-jobservice-env.sh: |
{{ with secret "kv/data/atlas/harbor/harbor-core" }}
@ -227,7 +216,6 @@ spec:
portal:
nodeSelector:
ananke.bstein.dev/harbor-bootstrap: "true"
kubernetes.io/hostname: titan-11
image:
repository: registry.bstein.dev/infra/harbor-portal
tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-portal:tag"}
@ -255,7 +243,6 @@ spec:
registry:
nodeSelector:
ananke.bstein.dev/harbor-bootstrap: "true"
kubernetes.io/hostname: titan-11
registry:
image:
repository: registry.bstein.dev/infra/harbor-registry
@ -283,10 +270,6 @@ spec:
podAnnotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "harbor"
vault.hashicorp.com/agent-requests-cpu: "25m"
vault.hashicorp.com/agent-limits-cpu: "100m"
vault.hashicorp.com/agent-requests-mem: "32Mi"
vault.hashicorp.com/agent-limits-mem: "128Mi"
vault.hashicorp.com/agent-inject-secret-harbor-registry-env.sh: "kv/data/atlas/harbor/harbor-registry"
vault.hashicorp.com/agent-inject-template-harbor-registry-env.sh: |
{{ with secret "kv/data/atlas/harbor/harbor-registry" }}
@ -338,7 +321,6 @@ spec:
nginx:
nodeSelector:
ananke.bstein.dev/harbor-bootstrap: "true"
kubernetes.io/hostname: titan-11
image:
repository: registry.bstein.dev/infra/harbor-nginx
tag: v2.14.1-arm64 # {"$imagepolicy": "harbor:harbor-nginx:tag"}

View File

@ -8,7 +8,7 @@ spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: 100Gi
storage: 50Gi
storageClassName: astreae
---
apiVersion: v1

View File

@ -8,18 +8,8 @@ resources:
- portal-rbac.yaml
- wger-media-pvc.yaml
- wger-static-pvc.yaml
- wger-admin-ensure-cronjob.yaml
- wger-user-sync-cronjob.yaml
- wger-deployment.yaml
- wger-service.yaml
- wger-ingress.yaml
generatorOptions:
disableNameSuffixHash: true
configMapGenerator:
- name: wger-nginx-config
files:
- default.conf=config/nginx.conf
- nginx.conf=config/nginx-main.conf
- name: wger-user-sync-script
files:
- wger_user_sync.py=scripts/wger_user_sync.py

View File

@ -1,120 +0,0 @@
#!/usr/bin/env python3
from __future__ import annotations
import os
import sys
import django
def _env(name: str, default: str = "") -> str:
value = os.getenv(name, default)
return value.strip() if isinstance(value, str) else ""
def _setup_django() -> None:
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings.main")
django.setup()
def _set_default_gym(user) -> None:
try:
from wger.gym.models import GymConfig
except Exception:
return
try:
config = GymConfig.objects.first()
except Exception:
return
if not config or not getattr(config, "default_gym", None):
return
profile = getattr(user, "userprofile", None)
if not profile or getattr(profile, "gym", None):
return
profile.gym = config.default_gym
profile.save()
def _ensure_profile(user) -> None:
profile = getattr(user, "userprofile", None)
if not profile:
return
if hasattr(profile, "email_verified") and not profile.email_verified:
profile.email_verified = True
if hasattr(profile, "is_temporary") and profile.is_temporary:
profile.is_temporary = False
profile.save()
def _ensure_admin(username: str, password: str, email: str) -> None:
from django.contrib.auth.models import User
if not username or not password:
raise RuntimeError("admin username/password missing")
user, created = User.objects.get_or_create(username=username)
if created:
user.is_active = True
if not user.is_staff:
user.is_staff = True
if email:
user.email = email
user.set_password(password)
user.save()
_ensure_profile(user)
_set_default_gym(user)
print(f"ensured admin user {username}")
def _ensure_user(username: str, password: str, email: str) -> None:
from django.contrib.auth.models import User
if not username or not password:
raise RuntimeError("username/password missing")
user, created = User.objects.get_or_create(username=username)
if created:
user.is_active = True
if email and user.email != email:
user.email = email
user.set_password(password)
user.save()
_ensure_profile(user)
_set_default_gym(user)
action = "created" if created else "updated"
print(f"{action} user {username}")
def main() -> int:
admin_user = _env("WGER_ADMIN_USERNAME")
admin_password = _env("WGER_ADMIN_PASSWORD")
admin_email = _env("WGER_ADMIN_EMAIL")
username = _env("WGER_USERNAME") or _env("ONLY_USERNAME")
password = _env("WGER_PASSWORD")
email = _env("WGER_EMAIL")
if not any([admin_user and admin_password, username and password]):
print("no admin or user payload provided; exiting")
return 0
_setup_django()
if admin_user and admin_password:
_ensure_admin(admin_user, admin_password, admin_email)
if username and password:
_ensure_user(username, password, email)
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@ -1,120 +0,0 @@
# services/health/wger-admin-ensure-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: wger-admin-ensure
namespace: health
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "15 3 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 1
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "health"
vault.hashicorp.com/agent-inject-secret-wger-env: "kv/data/atlas/health/wger-db"
vault.hashicorp.com/agent-inject-template-wger-env: |
{{ with secret "kv/data/atlas/health/wger-db" }}
export DJANGO_DB_HOST="{{ .Data.data.DJANGO_DB_HOST }}"
export DJANGO_DB_PORT="{{ .Data.data.DJANGO_DB_PORT }}"
export DJANGO_DB_DATABASE="{{ .Data.data.DJANGO_DB_DATABASE }}"
export DJANGO_DB_USER="{{ .Data.data.DJANGO_DB_USER }}"
export DJANGO_DB_PASSWORD="$(cat /vault/secrets/wger-db-password)"
{{ end }}
{{ with secret "kv/data/atlas/health/wger-secrets" }}
export SECRET_KEY="$(cat /vault/secrets/wger-secret-key)"
export SIGNING_KEY="$(cat /vault/secrets/wger-signing-key)"
{{ end }}
{{ with secret "kv/data/atlas/health/wger-admin" }}
export WGER_ADMIN_USERNAME="$(cat /vault/secrets/wger-admin-username)"
export WGER_ADMIN_PASSWORD="$(cat /vault/secrets/wger-admin-password)"
{{ end }}
vault.hashicorp.com/agent-inject-secret-wger-db-password: "kv/data/atlas/health/wger-db"
vault.hashicorp.com/agent-inject-template-wger-db-password: |
{{- with secret "kv/data/atlas/health/wger-db" -}}
{{ .Data.data.DJANGO_DB_PASSWORD }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-secret-key: "kv/data/atlas/health/wger-secrets"
vault.hashicorp.com/agent-inject-template-wger-secret-key: |
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
{{ .Data.data.SECRET_KEY }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-signing-key: "kv/data/atlas/health/wger-secrets"
vault.hashicorp.com/agent-inject-template-wger-signing-key: |
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
{{ .Data.data.SIGNING_KEY }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-admin-username: "kv/data/atlas/health/wger-admin"
vault.hashicorp.com/agent-inject-template-wger-admin-username: |
{{- with secret "kv/data/atlas/health/wger-admin" -}}
{{ .Data.data.username }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-admin-password: "kv/data/atlas/health/wger-admin"
vault.hashicorp.com/agent-inject-template-wger-admin-password: |
{{- with secret "kv/data/atlas/health/wger-admin" -}}
{{ .Data.data.password }}
{{- end -}}
spec:
serviceAccountName: health-vault-sync
restartPolicy: Never
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 70
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: ensure
image: wger/server@sha256:710588b78af4e0aa0b4d8a8061e4563e16eae80eeaccfe7f9e0d9cbdd7f0cbc5
imagePullPolicy: IfNotPresent
command: ["/bin/sh", "-c"]
args:
- |
set -eu
. /vault/secrets/wger-env
exec python3 /scripts/wger_user_sync.py
env:
- name: SITE_URL
value: https://health.bstein.dev
- name: TIME_ZONE
value: Etc/UTC
- name: TZ
value: Etc/UTC
- name: DJANGO_DEBUG
value: "False"
- name: DJANGO_DB_ENGINE
value: django.db.backends.postgresql
- name: DJANGO_CACHE_BACKEND
value: django.core.cache.backends.locmem.LocMemCache
- name: DJANGO_CACHE_LOCATION
value: wger-cache
volumeMounts:
- name: wger-user-sync-script
mountPath: /scripts
readOnly: true
volumes:
- name: wger-user-sync-script
configMap:
name: wger-user-sync-script
defaultMode: 0555

View File

@ -1,106 +0,0 @@
# services/health/wger-user-sync-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: wger-user-sync
namespace: health
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "0 5 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "health"
vault.hashicorp.com/agent-inject-secret-wger-env: "kv/data/atlas/health/wger-db"
vault.hashicorp.com/agent-inject-template-wger-env: |
{{ with secret "kv/data/atlas/health/wger-db" }}
export DJANGO_DB_HOST="{{ .Data.data.DJANGO_DB_HOST }}"
export DJANGO_DB_PORT="{{ .Data.data.DJANGO_DB_PORT }}"
export DJANGO_DB_DATABASE="{{ .Data.data.DJANGO_DB_DATABASE }}"
export DJANGO_DB_USER="{{ .Data.data.DJANGO_DB_USER }}"
export DJANGO_DB_PASSWORD="$(cat /vault/secrets/wger-db-password)"
{{ end }}
{{ with secret "kv/data/atlas/health/wger-secrets" }}
export SECRET_KEY="$(cat /vault/secrets/wger-secret-key)"
export SIGNING_KEY="$(cat /vault/secrets/wger-signing-key)"
{{ end }}
vault.hashicorp.com/agent-inject-secret-wger-db-password: "kv/data/atlas/health/wger-db"
vault.hashicorp.com/agent-inject-template-wger-db-password: |
{{- with secret "kv/data/atlas/health/wger-db" -}}
{{ .Data.data.DJANGO_DB_PASSWORD }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-secret-key: "kv/data/atlas/health/wger-secrets"
vault.hashicorp.com/agent-inject-template-wger-secret-key: |
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
{{ .Data.data.SECRET_KEY }}
{{- end -}}
vault.hashicorp.com/agent-inject-secret-wger-signing-key: "kv/data/atlas/health/wger-secrets"
vault.hashicorp.com/agent-inject-template-wger-signing-key: |
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
{{ .Data.data.SIGNING_KEY }}
{{- end -}}
spec:
serviceAccountName: health-vault-sync
restartPolicy: Never
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 70
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
containers:
- name: sync
image: wger/server@sha256:710588b78af4e0aa0b4d8a8061e4563e16eae80eeaccfe7f9e0d9cbdd7f0cbc5
imagePullPolicy: IfNotPresent
command: ["/bin/sh", "-c"]
args:
- |
set -eu
. /vault/secrets/wger-env
exec python3 /scripts/wger_user_sync.py
env:
- name: SITE_URL
value: https://health.bstein.dev
- name: TIME_ZONE
value: Etc/UTC
- name: TZ
value: Etc/UTC
- name: DJANGO_DEBUG
value: "False"
- name: DJANGO_DB_ENGINE
value: django.db.backends.postgresql
- name: DJANGO_CACHE_BACKEND
value: django.core.cache.backends.locmem.LocMemCache
- name: DJANGO_CACHE_LOCATION
value: wger-cache
volumeMounts:
- name: wger-user-sync-script
mountPath: /scripts
readOnly: true
volumes:
- name: wger-user-sync-script
configMap:
name: wger-user-sync-script
defaultMode: 0555

View File

@ -77,26 +77,23 @@ spec:
mountPath: /config
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: longhorn-host
operator: In
values:
- "true"
- key: node-role.kubernetes.io/worker
operator: In
values:
- "true"
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: hardware
- key: kubernetes.io/hostname
operator: In
values:
- rpi5
- titan-22
- weight: 80
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: In
values:
- titan-20
- titan-21
- weight: 60
preference:
matchExpressions:
- key: kubernetes.io/hostname
@ -108,6 +105,7 @@ spec:
fsGroup: 65532
fsGroupChangePolicy: OnRootMismatch
runAsGroup: 65532
runtimeClassName: nvidia
containers:
- name: jellyfin
image: docker.io/jellyfin/jellyfin:10.11.5
@ -120,6 +118,8 @@ spec:
- name: http
containerPort: 8096
env:
- name: NVIDIA_DRIVER_CAPABILITIES
value: "compute,video,utility"
- name: JELLYFIN_PublishedServerUrl
value: "https://stream.bstein.dev"
- name: PUID
@ -131,7 +131,12 @@ spec:
- name: VAULT_COPY_FILES
value: /vault/secrets/ldap-config.xml:/config/plugins/configurations/LDAP-Auth.xml
resources:
limits:
nvidia.com/gpu.shared: 1
# cpu: "4"
# memory: 8Gi
requests:
nvidia.com/gpu.shared: 1
cpu: "500m"
memory: 1Gi
volumeMounts:

568
services/jellyfin/oidc/Jenkinsfile vendored Normal file
View File

@ -0,0 +1,568 @@
pipeline {
agent {
kubernetes {
yaml """
apiVersion: v1
kind: Pod
spec:
restartPolicy: Never
containers:
- name: dotnet
image: mcr.microsoft.com/dotnet/sdk:9.0
command:
- cat
tty: true
"""
}
}
options {
timestamps()
}
parameters {
string(name: 'HARBOR_REPO', defaultValue: 'registry.bstein.dev/streaming/oidc-plugin', description: 'OCI repository for the plugin artifact')
string(name: 'JELLYFIN_VERSION', defaultValue: '10.11.5', description: 'Jellyfin version to tag the plugin with')
string(name: 'PLUGIN_VERSION', defaultValue: '1.0.2.0', description: 'Plugin version')
}
environment {
ORAS_VERSION = "1.2.0"
DOTNET_CLI_TELEMETRY_OPTOUT = "1"
DOTNET_SKIP_FIRST_TIME_EXPERIENCE = "1"
}
stages {
stage('Checkout') {
steps {
container('dotnet') {
checkout scm
}
}
}
stage('Build plugin') {
steps {
container('dotnet') {
sh '''
set -euo pipefail
apt-get update
apt-get install -y --no-install-recommends zip curl ca-certificates git
WORKDIR="$(pwd)/build"
SRC_DIR="${WORKDIR}/src"
DIST_DIR="${WORKDIR}/dist"
ART_DIR="${WORKDIR}/artifact"
rm -rf "${SRC_DIR}" "${DIST_DIR}" "${ART_DIR}"
mkdir -p "${SRC_DIR}" "${DIST_DIR}" "${ART_DIR}"
git clone https://github.com/lolerskatez/JellyfinOIDCPlugin.git "${SRC_DIR}"
cd "${SRC_DIR}"
# Override controllers to avoid DI version issues and add injection script
cat > Controllers/OidcController.cs <<'EOF'
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using IdentityModel.OidcClient;
using MediaBrowser.Controller.Library;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Extensions.DependencyInjection;
namespace JellyfinOIDCPlugin.Controllers;
#nullable enable
[ApiController]
[Route("api/oidc")]
public class OidcController : ControllerBase
{
private IUserManager UserManager => HttpContext.RequestServices.GetRequiredService<IUserManager>();
private static readonly Dictionary<string, object> StateManager = new(); // Store AuthorizeState objects
[HttpGet("start")]
public async Task<IActionResult> Start()
{
var config = Plugin.Instance?.Configuration;
if (config == null)
{
return BadRequest("Plugin not initialized");
}
var options = new OidcClientOptions
{
Authority = config.OidEndpoint?.Trim(),
ClientId = config.OidClientId?.Trim(),
ClientSecret = config.OidSecret?.Trim(),
RedirectUri = GetRedirectUri(),
Scope = string.Join(" ", config.OidScopes)
};
try
{
var client = new OidcClient(options);
var result = await client.PrepareLoginAsync().ConfigureAwait(false);
// Store the authorize state for the callback
var stateString = (string)result.GetType().GetProperty("State")?.GetValue(result);
if (!string.IsNullOrEmpty(stateString))
{
StateManager[stateString] = result;
}
var startUrl = (string)result.GetType().GetProperty("StartUrl")?.GetValue(result);
if (string.IsNullOrEmpty(startUrl))
{
Console.WriteLine("OIDC: Could not get StartUrl from OIDC result");
return BadRequest("OIDC initialization failed");
}
return Redirect(startUrl);
}
catch (Exception ex)
{
Console.WriteLine($"OIDC start error: {ex}");
return BadRequest("OIDC error: " + ex.Message);
}
}
[HttpGet("callback")]
public async Task<IActionResult> Callback()
{
var config = Plugin.Instance?.Configuration;
if (config == null)
{
return BadRequest("Plugin not initialized");
}
try
{
var stateParam = Request.Query["state"].ToString();
if (string.IsNullOrEmpty(stateParam) || !StateManager.TryGetValue(stateParam, out var storedState))
{
Console.WriteLine($"OIDC: Invalid state {stateParam}");
return BadRequest("Invalid state");
}
var options = new OidcClientOptions
{
Authority = config.OidEndpoint?.Trim(),
ClientId = config.OidClientId?.Trim(),
ClientSecret = config.OidSecret?.Trim(),
RedirectUri = GetRedirectUri(),
Scope = string.Join(" ", config.OidScopes)
};
var client = new OidcClient(options);
// Cast stored state to AuthorizeState - it's stored as object
var authorizeState = (AuthorizeState)storedState;
var result = await client.ProcessResponseAsync(Request.QueryString.Value, authorizeState).ConfigureAwait(false);
if (result.IsError)
{
Console.WriteLine($"OIDC callback failed: {result.Error} - {result.ErrorDescription}");
return BadRequest("OIDC authentication failed");
}
// Get email from claims
var email = result.User?.FindFirst("email")?.Value ??
result.User?.FindFirst("preferred_username")?.Value ??
result.User?.FindFirst("sub")?.Value;
if (string.IsNullOrEmpty(email))
{
Console.WriteLine("OIDC: No email/username found in OIDC response");
return BadRequest("No email/username found in OIDC response");
}
// Get or create user
var user = UserManager.GetUserByName(email);
if (user == null)
{
Console.WriteLine($"OIDC: Creating new user {email}");
user = await UserManager.CreateUserAsync(email).ConfigureAwait(false);
}
// Set authentication provider
user.AuthenticationProviderId = "OIDC";
// Get roles from claims
var rolesClaimValue = result.User?.FindFirst(config.RoleClaim)?.Value;
var roles = string.IsNullOrEmpty(rolesClaimValue)
? Array.Empty<string>()
: rolesClaimValue.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries);
// Set permissions based on groups
var isAdmin = roles.Any(r => r.Equals("admin", StringComparison.OrdinalIgnoreCase));
var isPowerUser = roles.Any(r => r.Equals("Power User", StringComparison.OrdinalIgnoreCase)) && !isAdmin;
Console.WriteLine($"OIDC: User {email} authenticated. Admin: {isAdmin}, PowerUser: {isPowerUser}");
// Update user in database
await UserManager.UpdateUserAsync(user).ConfigureAwait(false);
StateManager.Remove(stateParam);
// Redirect to Jellyfin main page
return Redirect("/");
}
catch (Exception ex)
{
Console.WriteLine($"OIDC callback error: {ex}");
return BadRequest("OIDC error: " + ex.Message);
}
}
[HttpPost("token")]
public async Task<IActionResult> ExchangeToken([FromBody] TokenExchangeRequest request)
{
var config = Plugin.Instance?.Configuration;
if (config == null)
{
Console.WriteLine("OIDC: Plugin not initialized");
return BadRequest("Plugin not initialized");
}
if (string.IsNullOrEmpty(request?.AccessToken))
{
Console.WriteLine("OIDC: No access token provided");
return BadRequest("Access token is required");
}
try
{
Console.WriteLine("OIDC: Processing token exchange request");
// Validate the token with the OIDC provider using UserInfo endpoint
var options = new OidcClientOptions
{
Authority = config.OidEndpoint?.Trim(),
ClientId = config.OidClientId?.Trim(),
ClientSecret = config.OidSecret?.Trim(),
Scope = string.Join(" ", config.OidScopes)
};
var client = new OidcClient(options);
// Use the access token to get user info
var userInfoResult = await client.GetUserInfoAsync(request.AccessToken).ConfigureAwait(false);
if (userInfoResult.IsError)
{
Console.WriteLine($"OIDC: Failed to get user info: {userInfoResult.Error}");
return Unauthorized("Invalid access token");
}
// Extract email/username from user info
var email = userInfoResult.Claims.FirstOrDefault(c => c.Type == "email")?.Value ??
userInfoResult.Claims.FirstOrDefault(c => c.Type == "preferred_username")?.Value ??
userInfoResult.Claims.FirstOrDefault(c => c.Type == "sub")?.Value;
if (string.IsNullOrEmpty(email))
{
Console.WriteLine("OIDC: No email/username found in token");
return BadRequest("No email/username found in token");
}
// Get or create user
var user = UserManager.GetUserByName(email);
if (user == null)
{
if (!config.AutoCreateUser)
{
Console.WriteLine($"OIDC: User {email} not found and auto-create disabled");
return Unauthorized("User does not exist and auto-creation is disabled");
}
Console.WriteLine($"OIDC: Creating new user from token {email}");
user = await UserManager.CreateUserAsync(email).ConfigureAwait(false);
}
// Update user authentication provider
user.AuthenticationProviderId = "OIDC";
// Get roles from claims
var rolesClaimName = config.RoleClaim ?? "groups";
var rolesClaimValue = userInfoResult.Claims.FirstOrDefault(c => c.Type == rolesClaimName)?.Value;
var roles = string.IsNullOrEmpty(rolesClaimValue)
? Array.Empty<string>()
: rolesClaimValue.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries);
// Set permissions based on groups
var isAdmin = roles.Any(r => r.Equals("admin", StringComparison.OrdinalIgnoreCase));
var isPowerUser = roles.Any(r => r.Equals("Power User", StringComparison.OrdinalIgnoreCase)) && !isAdmin;
Console.WriteLine($"OIDC: Token exchange for {email} Admin:{isAdmin} Power:{isPowerUser}");
// Update user in database
await UserManager.UpdateUserAsync(user).ConfigureAwait(false);
// Return success with user info
return Ok(new TokenExchangeResponse
{
Success = true,
UserId = user.Id.ToString(),
Username = user.Username,
Email = email,
IsAdmin = isAdmin,
Message = "User authenticated successfully"
});
}
catch (Exception ex)
{
Console.WriteLine($"OIDC token exchange error: {ex}");
return StatusCode(500, $"Token exchange failed: {ex.Message}");
}
}
private string GetRedirectUri()
{
var configured = Plugin.Instance?.Configuration?.RedirectUri;
if (!string.IsNullOrWhiteSpace(configured))
{
return configured!;
}
return $"{Request.Scheme}://{Request.Host}/api/oidc/callback";
}
}
public class TokenExchangeRequest
{
public string? AccessToken { get; set; }
public string? IdToken { get; set; }
}
public class TokenExchangeResponse
{
public bool Success { get; set; }
public string? UserId { get; set; }
public string? Username { get; set; }
public string? Email { get; set; }
public bool IsAdmin { get; set; }
public string? Message { get; set; }
}
EOF
cat > Controllers/OidcStaticController.cs <<'EOF'
using System;
using System.IO;
using System.Reflection;
using MediaBrowser.Common.Plugins;
using Microsoft.AspNetCore.Mvc;
namespace JellyfinOIDCPlugin.Controllers;
[ApiController]
[Route("api/oidc")]
public class OidcStaticController : ControllerBase
{
[HttpGet("login.js")]
public IActionResult GetLoginScript()
{
try
{
var assembly = Assembly.GetExecutingAssembly();
using var stream = assembly.GetManifestResourceStream("JellyfinOIDCPlugin.web.oidc-login.js");
if (stream == null)
{
Console.WriteLine("OIDC: Login script resource not found");
return NotFound();
}
using var reader = new StreamReader(stream);
var content = reader.ReadToEnd();
return Content(content, "application/javascript");
}
catch (Exception ex)
{
Console.WriteLine($"OIDC: Error serving login script {ex}");
return StatusCode(500, "Error loading login script");
}
}
[HttpGet("loader.js")]
public IActionResult GetLoader()
{
try
{
var assembly = Assembly.GetExecutingAssembly();
using var stream = assembly.GetManifestResourceStream("JellyfinOIDCPlugin.web.oidc-loader.js");
if (stream == null)
{
Console.WriteLine("OIDC: Loader script resource not found");
return NotFound();
}
using var reader = new StreamReader(stream);
var content = reader.ReadToEnd();
return Content(content, "application/javascript");
}
catch (Exception ex)
{
Console.WriteLine($"OIDC: Error serving loader script {ex}");
return StatusCode(500, "Error loading loader script");
}
}
[HttpGet("inject")]
public IActionResult GetInject()
{
try
{
var script = @"
(function() {
console.log('[OIDC Plugin] Bootstrap inject started');
// Load oidc-loader.js dynamically
const loaderScript = document.createElement('script');
loaderScript.src = '/api/oidc/loader.js';
loaderScript.type = 'application/javascript';
loaderScript.onerror = function() {
console.error('[OIDC Plugin] Failed to load loader.js');
};
loaderScript.onload = function() {
console.log('[OIDC Plugin] Loader.js loaded successfully');
};
// Append to head or body
const target = document.head || document.documentElement;
target.appendChild(loaderScript);
console.log('[OIDC Plugin] Bootstrap script appended to page');
})();
";
return Content(script, "application/javascript");
}
catch (Exception ex)
{
Console.WriteLine($"OIDC: Error serving inject script {ex}");
return StatusCode(500, "Error loading inject script");
}
}
[HttpGet("global.js")]
public IActionResult GetGlobalInjector()
{
try
{
var assembly = Assembly.GetExecutingAssembly();
using var stream = assembly.GetManifestResourceStream("JellyfinOIDCPlugin.web.oidc-global-injector.js");
if (stream == null)
{
Console.WriteLine("OIDC: Global injector resource not found");
return NotFound();
}
using var reader = new StreamReader(stream);
var content = reader.ReadToEnd();
return Content(content, "application/javascript");
}
catch (Exception ex)
{
Console.WriteLine($"OIDC: Error serving global injector {ex}");
return StatusCode(500, "Error loading global injector");
}
}
[HttpGet("config")]
public IActionResult GetConfigurationPage()
{
try
{
var assembly = Assembly.GetExecutingAssembly();
using var stream = assembly.GetManifestResourceStream("JellyfinOIDCPlugin.web.configurationpage.html");
if (stream == null)
{
Console.WriteLine("OIDC: Configuration page resource not found");
return NotFound("Configuration page resource not found");
}
using var reader = new StreamReader(stream);
var content = reader.ReadToEnd();
return Content(content, "text/html");
}
catch (Exception ex)
{
Console.WriteLine($"OIDC: Error serving configuration page {ex}");
return StatusCode(500, $"Error loading configuration page: {ex.Message}");
}
}
}
EOF
cat > JellyfinOIDCPlugin.csproj <<'EOF'
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net9.0</TargetFramework>
<AssemblyName>JellyfinOIDCPlugin.v2</AssemblyName>
<RootNamespace>JellyfinOIDCPlugin</RootNamespace>
<LangVersion>latest</LangVersion>
<Nullable>enable</Nullable>
<ImplicitUsings>enable</ImplicitUsings>
<AssemblyVersion>1.0.2.0</AssemblyVersion>
<FileVersion>1.0.2.0</FileVersion>
<CopyLocalLockFileAssemblies>false</CopyLocalLockFileAssemblies>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Jellyfin.Controller" Version="10.11.5">
<ExcludeAssets>runtime</ExcludeAssets>
</PackageReference>
<PackageReference Include="Jellyfin.Model" Version="10.11.5">
<ExcludeAssets>runtime</ExcludeAssets>
</PackageReference>
<PackageReference Include="Jellyfin.Common" Version="10.11.5">
<ExcludeAssets>runtime</ExcludeAssets>
</PackageReference>
<PackageReference Include="Jellyfin.Data" Version="10.11.5">
<ExcludeAssets>runtime</ExcludeAssets>
</PackageReference>
<PackageReference Include="Jellyfin.Database.Implementations" Version="10.11.5">
<ExcludeAssets>runtime</ExcludeAssets>
</PackageReference>
<PackageReference Include="IdentityModel.OidcClient" Version="5.2.1">
<PrivateAssets>none</PrivateAssets>
</PackageReference>
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="9.0.11">
<ExcludeAssets>runtime</ExcludeAssets>
</PackageReference>
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="web\\*.html" />
<EmbeddedResource Include="web\\*.js" />
<EmbeddedResource Include="web\\*.css" />
</ItemGroup>
</Project>
EOF
dotnet restore
dotnet publish -c Release --no-self-contained -o "${DIST_DIR}"
cd "${DIST_DIR}"
zip -r "${ART_DIR}/OIDC_Authentication_${PLUGIN_VERSION}-net9.zip" .
'''
}
}
}
stage('Push to Harbor') {
steps {
container('dotnet') {
withCredentials([usernamePassword(credentialsId: 'harbor-robot', usernameVariable: 'HARBOR_USERNAME', passwordVariable: 'HARBOR_PASSWORD')]) {
sh '''
set -euo pipefail
WORKDIR="$(pwd)/build"
ORAS_BIN="/usr/local/bin/oras"
curl -sSL "https://github.com/oras-project/oras/releases/download/v${ORAS_VERSION}/oras_${ORAS_VERSION}_linux_amd64.tar.gz" | tar -xz -C /usr/local/bin oras
ref_host="$(echo "${HARBOR_REPO}" | cut -d/ -f1)"
"${ORAS_BIN}" login "${ref_host}" -u "${HARBOR_USERNAME}" -p "${HARBOR_PASSWORD}"
artifact="${WORKDIR}/artifact/OIDC_Authentication_${PLUGIN_VERSION}-net9.zip"
"${ORAS_BIN}" push "${HARBOR_REPO}:${JELLYFIN_VERSION}" "${artifact}:application/zip" --artifact-type application/zip
"${ORAS_BIN}" push "${HARBOR_REPO}:latest" "${artifact}:application/zip" --artifact-type application/zip
'''
}
}
}
}
}
post {
always {
container('dotnet') {
archiveArtifacts artifacts: 'build/artifact/*.zip', allowEmptyArchive: true
}
}
}
}

View File

@ -45,17 +45,6 @@ data:
username: "${HARBOR_ROBOT_USERNAME}"
password: "${HARBOR_ROBOT_PASSWORD}"
description: "Harbor robot for pipelines"
- usernamePassword:
scope: GLOBAL
id: harbor-robot-streaming
username: "${HARBOR_STREAMING_ROBOT_USERNAME}"
password: "${HARBOR_STREAMING_ROBOT_PASSWORD}"
description: "Harbor robot for streaming pushes"
- string:
scope: GLOBAL
id: sonarqube-token
secret: "${SONARQUBE_TOKEN}"
description: "SonarQube token for quality-gate evidence collection"
jobs.yaml: |
jobs:
- script: |
@ -214,32 +203,6 @@ data:
}
}
}
pipelineJob('arcanagon') {
properties {
pipelineTriggers {
triggers {
scmTrigger {
scmpoll_spec('H/5 * * * *')
ignorePostCommitHooks(false)
}
}
}
}
definition {
cpsScm {
scm {
git {
remote {
url('https://scm.bstein.dev/bstein/arcanagon.git')
credentials('gitea-pat')
}
branches('*/master')
}
}
scriptPath('Jenkinsfile')
}
}
}
pipelineJob('pegasus') {
properties {
pipelineTriggers {
@ -266,58 +229,6 @@ data:
}
}
}
pipelineJob('atlasbot') {
properties {
pipelineTriggers {
triggers {
scmTrigger {
scmpoll_spec('H/5 * * * *')
ignorePostCommitHooks(false)
}
}
}
}
definition {
cpsScm {
scm {
git {
remote {
url('https://scm.bstein.dev/bstein/atlasbot.git')
credentials('gitea-pat')
}
branches('*/main')
}
}
scriptPath('Jenkinsfile')
}
}
}
pipelineJob('soteria') {
properties {
pipelineTriggers {
triggers {
scmTrigger {
scmpoll_spec('H/5 * * * *')
ignorePostCommitHooks(false)
}
}
}
}
definition {
cpsScm {
scm {
git {
remote {
url('https://scm.bstein.dev/bstein/soteria.git')
credentials('gitea-pat')
}
branches('*/main')
}
}
scriptPath('Jenkinsfile')
}
}
}
pipelineJob('data-prepper') {
properties {
pipelineTriggers {
@ -337,65 +248,13 @@ data:
url('https://scm.bstein.dev/bstein/titan-iac.git')
credentials('gitea-pat')
}
branches('*/main')
branches('*/feature/sso-hardening')
}
}
scriptPath('services/logging/Jenkinsfile.data-prepper')
}
}
}
pipelineJob('titan-iac') {
properties {
pipelineTriggers {
triggers {
scmTrigger {
scmpoll_spec('H/5 * * * *')
ignorePostCommitHooks(false)
}
}
}
}
definition {
cpsScm {
scm {
git {
remote {
url('https://scm.bstein.dev/bstein/titan-iac.git')
credentials('gitea-pat')
}
branches('*/main')
}
}
scriptPath('Jenkinsfile')
}
}
}
pipelineJob('typhon') {
properties {
pipelineTriggers {
triggers {
scmTrigger {
scmpoll_spec('H/5 * * * *')
ignorePostCommitHooks(false)
}
}
}
}
definition {
cpsScm {
scm {
git {
remote {
url('https://scm.bstein.dev/bstein/typhon.git')
credentials('gitea-pat')
}
branches('*/main')
}
}
scriptPath('Jenkinsfile')
}
}
}
multibranchPipelineJob('titan-iac-quality-gate') {
branchSources {
branchSource {
@ -488,40 +347,6 @@ data:
podRetention: Never
serviceAccount: "jenkins"
slaveConnectTimeoutStr: "100"
yaml: |
spec:
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 85
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
jenkins/jenkins-jenkins-agent: "true"
yamlMergeStrategy: override
inheritYamlMergeStrategy: false
slaveAgentPort: 50000

View File

@ -33,35 +33,22 @@ spec:
{{ with secret "kv/data/atlas/jenkins/harbor-robot-creds" }}
HARBOR_ROBOT_USERNAME={{ .Data.data.username }}
HARBOR_ROBOT_PASSWORD={{ .Data.data.password }}
HARBOR_STREAMING_ROBOT_USERNAME={{ .Data.data.username }}
HARBOR_STREAMING_ROBOT_PASSWORD={{ .Data.data.password }}
{{ end }}
{{ with secret "kv/data/atlas/jenkins/harbor-streaming-robot-creds" }}
HARBOR_STREAMING_ROBOT_USERNAME={{ .Data.data.username }}
HARBOR_STREAMING_ROBOT_PASSWORD={{ .Data.data.password }}
{{ end }}
{{ with secret "kv/data/atlas/shared/harbor-pull" }}
{{- if and .Data.data.username .Data.data.password }}
HARBOR_PULL_USERNAME={{ .Data.data.username }}
HARBOR_PULL_PASSWORD={{ .Data.data.password }}
HARBOR_ROBOT_USERNAME={{ .Data.data.username }}
HARBOR_ROBOT_PASSWORD={{ .Data.data.password }}
{{- end }}
{{ end }}
{{ with secret "kv/data/atlas/jenkins/gitea-pat" }}
GITEA_PAT_USERNAME={{ .Data.data.username }}
GITEA_PAT_TOKEN={{ .Data.data.token }}
{{ end }}
{{ with secret "kv/data/atlas/quality/sonarqube-oidc" }}
SONARQUBE_TOKEN={{ .Data.data.sonarqube_exporter_token }}
{{ end }}
{{ with secret "kv/data/atlas/jenkins/webhook-tokens" }}
TITAN_IAC_WEBHOOK_TOKEN={{ .Data.data.titan_iac_quality_gate }}
GIT_NOTIFY_TOKEN_BSTEIN_DEV_HOME={{ .Data.data.git_notify_bstein_dev_home }}
{{ end }}
{{ with secret "kv/data/atlas/jenkins/ariadne-api" }}
ARIADNE_JENKINS_API_USER={{ .Data.data.username }}
ARIADNE_JENKINS_API_TOKEN={{ .Data.data.token }}
{{ end }}
bstein.dev/restarted-at: "2026-04-13T06:35:00Z"
bstein.dev/restarted-at: "2026-02-02T15:10:33Z"
spec:
serviceAccountName: jenkins
nodeSelector:
@ -70,21 +57,6 @@ spec:
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
@ -103,7 +75,6 @@ spec:
- sso.bstein.dev
securityContext:
fsGroup: 1000
fsGroupChangePolicy: OnRootMismatch
initContainers:
- name: install-plugins
image: jenkins/jenkins:2.528.3-jdk21
@ -180,8 +151,7 @@ spec:
port: http
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 60
failureThreshold: 20
volumeMounts:
- name: jenkins-home
mountPath: /var/jenkins_home

View File

@ -22,7 +22,6 @@ configMapGenerator:
- name: jenkins-init-scripts
namespace: jenkins
files:
- ariadne-api-user.groovy=scripts/ariadne-api-user.groovy
- git-notify-token.groovy=scripts/git-notify-token.groovy
- theme.groovy=scripts/theme.groovy
options:

View File

@ -1,96 +0,0 @@
import hudson.model.User
import jenkins.security.ApiTokenProperty
def userId = (System.getenv("ARIADNE_JENKINS_API_USER") ?: "").trim()
def envTokenValue = (System.getenv("ARIADNE_JENKINS_API_TOKEN") ?: "").trim()
def tokenName = "ariadne-weather"
def tokenFile = new File("/var/jenkins_home/secrets/ariadne-api-token")
def userFile = new File("/var/jenkins_home/secrets/ariadne-api-user")
def persistedTokenValue = tokenFile.exists() ? (tokenFile.text ?: "").trim() : ""
def tokenValue = envTokenValue ?: persistedTokenValue
if (!userId || !tokenValue) {
println("Ariadne API user bootstrap skipped: missing ARIADNE_JENKINS_API_USER and no token source available")
return
}
def user = User.getById(userId, true)
if (user == null) {
println("Ariadne API user bootstrap failed: unable to resolve user ${userId}")
return
}
if (!user.getFullName() || user.getFullName().trim() == userId) {
user.setFullName("Ariadne Metrics")
}
def prop = user.getProperty(ApiTokenProperty.class)
if (prop == null) {
prop = new ApiTokenProperty()
user.addProperty(prop)
}
if (persistedTokenValue && prop.matchesPassword(persistedTokenValue)) {
tokenValue = persistedTokenValue
}
if (!prop.matchesPassword(tokenValue)) {
def store = prop.getTokenStore()
boolean configured = false
try {
def existing = store.getTokenListSortedByName().find { token ->
try {
token.getName() == tokenName
} catch (Throwable ignored) {
false
}
}
if (existing != null) {
try {
store.revokeToken(existing.getUuid())
} catch (Throwable ignored) {
try {
store.revokeToken(existing.uuid)
} catch (Throwable ignoredAgain) {
println("Ariadne API user bootstrap warning: failed to revoke existing token ${tokenName}")
}
}
}
store.addFixedNewToken(tokenName, tokenValue)
configured = true
} catch (Throwable ignored) {
// Fallback for older token-store variants.
}
if (!configured) {
if (persistedTokenValue && prop.matchesPassword(persistedTokenValue)) {
tokenValue = persistedTokenValue
} else {
def generated = store.generateNewToken(tokenName)
if (generated?.plainValue) {
tokenValue = generated.plainValue
}
println("Ariadne API user bootstrap warning: addFixedNewToken unavailable, generated replacement token")
}
}
}
tokenFile.parentFile?.mkdirs()
tokenFile.text = tokenValue + "\n"
tokenFile.setReadable(false, false)
tokenFile.setReadable(true, true)
tokenFile.setWritable(false, false)
tokenFile.setWritable(true, true)
userFile.parentFile?.mkdirs()
userFile.text = userId + "\n"
userFile.setReadable(false, false)
userFile.setReadable(true, true)
userFile.setWritable(false, false)
userFile.setWritable(true, true)
user.save()
println("Ariadne API user bootstrap complete for ${userId}")

View File

@ -35,38 +35,7 @@ subjects:
- kind: ServiceAccount
name: jenkins
namespace: jenkins
- kind: ServiceAccount
name: default
namespace: jenkins
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: jenkins-agent
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: jenkins-glue-observer
rules:
- apiGroups: ["batch"]
resources:
- cronjobs
verbs: ["get", "list", "watch"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: jenkins-glue-observer
subjects:
- kind: ServiceAccount
name: jenkins
namespace: jenkins
- kind: ServiceAccount
name: default
namespace: jenkins
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: jenkins-glue-observer

View File

@ -18,15 +18,6 @@ spec:
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values: ["titan-13", "titan-15", "titan-17", "titan-19"]
containers:
- name: sync
image: alpine:3.20

View File

@ -24,9 +24,7 @@ resources:
- oneoffs/logs-oidc-secret-ensure-job.yaml
- oneoffs/metis-oidc-secret-ensure-job.yaml
- oneoffs/soteria-oidc-secret-ensure-job.yaml
- oneoffs/quality-oidc-secret-ensure-job.yaml
- oneoffs/metis-ssh-keys-secret-ensure-job.yaml
- oneoffs/metis-node-passwords-secret-ensure-job.yaml
- oneoffs/harbor-oidc-secret-ensure-job.yaml
- oneoffs/vault-oidc-secret-ensure-job.yaml
- oneoffs/actual-oidc-secret-ensure-job.yaml

View File

@ -1,110 +0,0 @@
# services/keycloak/oneoffs/metis-node-passwords-secret-ensure-job.yaml
# One-off job for sso/metis-node-passwords-secret-ensure-4.
# Purpose: ensure per-node Metis recovery placeholders exist in Vault.
# Atlas/root values are preserved while intranet IPs are standardized per node.
apiVersion: batch/v1
kind: Job
metadata:
name: metis-node-passwords-secret-ensure-4
namespace: sso
spec:
backoffLimit: 0
ttlSecondsAfterFinished: 3600
template:
spec:
serviceAccountName: mas-secrets-ensure
restartPolicy: Never
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/worker
operator: Exists
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/arch
operator: In
values: ["arm64"]
containers:
- name: apply
image: registry.bstein.dev/bstein/kubectl:1.35.0
command: ["/bin/sh", "-c"]
args:
- |
set -eu
vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}"
vault_role="${VAULT_ROLE:-sso-secrets}"
jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')"
vault_token="$(curl -sS --request POST --data "${login_payload}" "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')"
if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then
echo "vault login failed" >&2
exit 1
fi
ensured=0
while read -r node intranet_ip; do
if [ -z "${node}" ] || [ -z "${intranet_ip}" ]; then
continue
fi
secret_path="kv/data/atlas/nodes/${node}"
read_status="$(curl -sS -o /tmp/node-read.json -w "%{http_code}" -H "X-Vault-Token: ${vault_token}" "${vault_addr}/v1/${secret_path}" || true)"
if [ "${read_status}" = "200" ]; then
atlas_password="$(jq -r '.data.data.atlas_password // empty' /tmp/node-read.json)"
root_password="$(jq -r '.data.data.root_password // empty' /tmp/node-read.json)"
elif [ "${read_status}" = "404" ]; then
atlas_password=""
root_password=""
else
echo "Vault read failed for ${node} (status ${read_status})" >&2
cat /tmp/node-read.json >&2 || true
exit 1
fi
payload="$(jq -nc --arg atlas_password "${atlas_password}" --arg root_password "${root_password}" --arg intranet_ip "${intranet_ip}" '{data:{atlas_password:$atlas_password,root_password:$root_password,intranet_ip:$intranet_ip}}')"
write_status="$(curl -sS -o /tmp/node-write.json -w "%{http_code}" -X POST -H "X-Vault-Token: ${vault_token}" -H 'Content-Type: application/json' -d "${payload}" "${vault_addr}/v1/${secret_path}")"
if [ "${write_status}" != "200" ] && [ "${write_status}" != "204" ]; then
echo "Vault write failed for ${node} (status ${write_status})" >&2
cat /tmp/node-write.json >&2 || true
exit 1
fi
ensured=$((ensured + 1))
echo "Ensured node secret placeholder for ${node} (${intranet_ip})"
done <<'EOF_NODES'
titan-jh 192.168.22.8
titan-db 192.168.22.10
titan-0a 192.168.22.11
titan-0b 192.168.22.12
titan-0c 192.168.22.13
titan-20 192.168.22.20
titan-21 192.168.22.21
titan-22 192.168.22.22
titan-23 192.168.22.23
titan-24 192.168.22.26
titan-04 192.168.22.30
titan-05 192.168.22.31
titan-06 192.168.22.32
titan-07 192.168.22.33
titan-08 192.168.22.34
titan-09 192.168.22.35
titan-10 192.168.22.36
titan-11 192.168.22.37
titan-12 192.168.22.40
titan-13 192.168.22.41
titan-14 192.168.22.42
titan-15 192.168.22.43
titan-16 192.168.22.44
titan-17 192.168.22.45
titan-18 192.168.22.46
titan-19 192.168.22.47
EOF_NODES
echo "Ensured ${ensured} Metis node placeholders in Vault"

View File

@ -73,7 +73,7 @@ spec:
CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)"
if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then
create_payload='{"clientId":"metis","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://recovery.bstein.dev/oauth2/callback"],"webOrigins":["https://recovery.bstein.dev"],"rootUrl":"https://recovery.bstein.dev","baseUrl":"/"}'
create_payload='{"clientId":"metis","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://sentinel.bstein.dev/oauth2/callback"],"webOrigins":["https://sentinel.bstein.dev"],"rootUrl":"https://sentinel.bstein.dev","baseUrl":"/"}'
status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
-H 'Content-Type: application/json' \
@ -121,7 +121,7 @@ spec:
fi
fi
update_payload='{"enabled":true,"clientId":"metis","protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://recovery.bstein.dev/oauth2/callback"],"webOrigins":["https://recovery.bstein.dev"],"rootUrl":"https://recovery.bstein.dev","baseUrl":"/"}'
update_payload='{"enabled":true,"clientId":"metis","protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://sentinel.bstein.dev/oauth2/callback"],"webOrigins":["https://sentinel.bstein.dev"],"rootUrl":"https://sentinel.bstein.dev","baseUrl":"/"}'
status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
-H 'Content-Type: application/json' \

View File

@ -1,198 +0,0 @@
# services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml
# One-off job for sso/quality-oidc-secret-ensure-1.
# Purpose: ensure the SonarQube oauth2-proxy OIDC client and Vault secret exist.
# Keep this completed Job around; bump the suffix if it ever needs to be rerun.
apiVersion: batch/v1
kind: Job
metadata:
name: quality-oidc-secret-ensure-1
namespace: sso
spec:
backoffLimit: 0
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "sso-secrets"
vault.hashicorp.com/agent-inject-secret-keycloak-admin-env.sh: "kv/data/atlas/shared/keycloak-admin"
vault.hashicorp.com/agent-inject-template-keycloak-admin-env.sh: |
{{ with secret "kv/data/atlas/shared/keycloak-admin" }}
export KEYCLOAK_ADMIN="{{ .Data.data.username }}"
export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}"
export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}"
{{ end }}
spec:
serviceAccountName: mas-secrets-ensure
restartPolicy: Never
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/worker
operator: Exists
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/arch
operator: In
values: ["arm64"]
containers:
- name: apply
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
command: ["/bin/sh", "-c"]
args:
- |
set -euo pipefail
. /vault/secrets/keycloak-admin-env.sh
KC_URL="http://keycloak.sso.svc.cluster.local"
ACCESS_TOKEN=""
for attempt in 1 2 3 4 5; do
TOKEN_JSON="$(curl -sS -X POST "$KC_URL/realms/master/protocol/openid-connect/token" \
-H 'Content-Type: application/x-www-form-urlencoded' \
-d "grant_type=password" \
-d "client_id=admin-cli" \
-d "username=${KEYCLOAK_ADMIN}" \
-d "password=${KEYCLOAK_ADMIN_PASSWORD}" || true)"
ACCESS_TOKEN="$(echo "$TOKEN_JSON" | jq -r '.access_token' 2>/dev/null || true)"
if [ -n "$ACCESS_TOKEN" ] && [ "$ACCESS_TOKEN" != "null" ]; then
break
fi
echo "Keycloak token request failed (attempt ${attempt})" >&2
sleep $((attempt * 2))
done
if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then
echo "Failed to fetch Keycloak admin token" >&2
exit 1
fi
CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/clients?clientId=sonarqube" || true)"
CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)"
if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then
create_payload='{"clientId":"sonarqube","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://quality.bstein.dev/oauth2/callback"],"webOrigins":["https://quality.bstein.dev"],"rootUrl":"https://quality.bstein.dev","baseUrl":"/"}'
status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
-H 'Content-Type: application/json' \
-d "${create_payload}" \
"$KC_URL/admin/realms/atlas/clients")"
if [ "$status" != "201" ] && [ "$status" != "204" ] && [ "$status" != "409" ]; then
echo "Keycloak client create failed (status ${status})" >&2
exit 1
fi
CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/clients?clientId=sonarqube" || true)"
CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)"
fi
if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then
echo "Keycloak client sonarqube not found" >&2
exit 1
fi
SCOPE_ID="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/client-scopes?search=groups" | jq -r '.[] | select(.name=="groups") | .id' 2>/dev/null | head -n1 || true)"
if [ -z "$SCOPE_ID" ] || [ "$SCOPE_ID" = "null" ]; then
echo "Keycloak client scope groups not found" >&2
exit 1
fi
DEFAULT_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/default-client-scopes" || true)"
OPTIONAL_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes" || true)"
if ! echo "$DEFAULT_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1 \
&& ! echo "$OPTIONAL_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1; then
status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")"
if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then
status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")"
if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then
echo "Failed to attach groups client scope to sonarqube (status ${status})" >&2
exit 1
fi
fi
fi
update_payload='{"enabled":true,"clientId":"sonarqube","protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://quality.bstein.dev/oauth2/callback"],"webOrigins":["https://quality.bstein.dev"],"rootUrl":"https://quality.bstein.dev","baseUrl":"/"}'
status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
-H 'Content-Type: application/json' \
-d "${update_payload}" \
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}")"
if [ "$status" != "204" ]; then
echo "Keycloak client update failed (status ${status})" >&2
exit 1
fi
CLIENT_SECRET="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value' 2>/dev/null || true)"
if [ -z "$CLIENT_SECRET" ] || [ "$CLIENT_SECRET" = "null" ]; then
echo "Keycloak client secret not found" >&2
exit 1
fi
vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}"
vault_role="${VAULT_ROLE:-sso-secrets}"
jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')"
vault_token="$(curl -sS --request POST --data "${login_payload}" \
"${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')"
if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then
echo "vault login failed" >&2
exit 1
fi
read_status="$(curl -sS -o /tmp/sonarqube-oidc-read.json -w "%{http_code}" \
-H "X-Vault-Token: ${vault_token}" \
"${vault_addr}/v1/kv/data/atlas/quality/sonarqube-oidc" || true)"
COOKIE_SECRET=""
if [ "${read_status}" = "200" ]; then
COOKIE_SECRET="$(jq -r '.data.data.cookie_secret // empty' /tmp/sonarqube-oidc-read.json)"
elif [ "${read_status}" != "404" ]; then
echo "Vault read failed (status ${read_status})" >&2
cat /tmp/sonarqube-oidc-read.json >&2 || true
exit 1
fi
if [ -n "${COOKIE_SECRET}" ]; then
length="$(printf '%s' "${COOKIE_SECRET}" | wc -c | tr -d ' ')"
if [ "${length}" != "16" ] && [ "${length}" != "24" ] && [ "${length}" != "32" ]; then
COOKIE_SECRET=""
fi
fi
if [ -z "${COOKIE_SECRET}" ]; then
COOKIE_SECRET="$(openssl rand -hex 16 | tr -d '\n')"
fi
payload="$(jq -nc \
--arg client_id "sonarqube" \
--arg client_secret "${CLIENT_SECRET}" \
--arg cookie_secret "${COOKIE_SECRET}" \
'{data:{client_id:$client_id,client_secret:$client_secret,cookie_secret:$cookie_secret}}')"
write_status="$(curl -sS -o /tmp/sonarqube-oidc-write.json -w "%{http_code}" -X POST \
-H "X-Vault-Token: ${vault_token}" \
-H 'Content-Type: application/json' \
-d "${payload}" "${vault_addr}/v1/kv/data/atlas/quality/sonarqube-oidc")"
if [ "${write_status}" != "200" ] && [ "${write_status}" != "204" ]; then
echo "Vault write failed (status ${write_status})" >&2
cat /tmp/sonarqube-oidc-write.json >&2 || true
exit 1
fi
verify_status="$(curl -sS -o /tmp/sonarqube-oidc-verify.json -w "%{http_code}" \
-H "X-Vault-Token: ${vault_token}" \
"${vault_addr}/v1/kv/data/atlas/quality/sonarqube-oidc" || true)"
if [ "${verify_status}" != "200" ]; then
echo "Vault verify failed (status ${verify_status})" >&2
cat /tmp/sonarqube-oidc-verify.json >&2 || true
exit 1
fi
echo "SonarQube OIDC secret ready in Vault"

View File

@ -8,6 +8,7 @@ spec:
restartPolicy: Never
serviceAccountName: jenkins
nodeSelector:
hardware: rpi5
node-role.kubernetes.io/worker: "true"
containers:
- name: git
@ -15,11 +16,6 @@ spec:
command:
- cat
tty: true
- name: quality-tools
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
command:
- cat
tty: true
- name: kaniko
image: gcr.io/kaniko-project/executor:v1.23.2-debug
command:
@ -27,7 +23,7 @@ spec:
tty: true
resources:
requests:
cpu: "100m"
cpu: "500m"
memory: "1Gi"
limits:
cpu: "1500m"
@ -38,13 +34,7 @@ spec:
environment {
SUITE_NAME = 'data_prepper'
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
SONARQUBE_PROJECT_KEY = 'data_prepper'
SONARQUBE_TOKEN = credentials('sonarqube-token')
QUALITY_GATE_SONARQUBE_ENFORCE = '1'
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
QUALITY_GATE_IRONBANK_ENFORCE = '1'
QUALITY_GATE_IRONBANK_REQUIRED = '1'
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
}
parameters {
@ -66,75 +56,6 @@ spec:
}
stage('Collect quality evidence') {
steps {
container('quality-tools') {
sh '''#!/usr/bin/env bash
set -euo pipefail
mkdir -p build
args=(
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
"-Dsonar.login=${SONARQUBE_TOKEN}"
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
"-Dsonar.sources=services/logging,dockerfiles"
"-Dsonar.inclusions=services/logging/Jenkinsfile.data-prepper,dockerfiles/Dockerfile.data-prepper"
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**"
)
set +e
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
sonar_rc=${PIPESTATUS[0]}
sonar_report="${QUALITY_GATE_SONARQUBE_REPORT:-build/sonarqube-quality-gate.json}"
host="${SONARQUBE_HOST_URL%/}"
query="$(printf '%s' "${SONARQUBE_PROJECT_KEY}" | sed 's/ /%20/g')"
sonar_ok=0
if [ -n "${SONARQUBE_TOKEN:-}" ]; then
auth="$(printf '%s:' "${SONARQUBE_TOKEN}" | base64 | tr -d '\\n')"
if command -v curl >/dev/null 2>&1; then
curl -fsS -H "Authorization: Basic ${auth}" "${host}/api/qualitygates/project_status?projectKey=${query}" > "${sonar_report}" && sonar_ok=1
elif command -v wget >/dev/null 2>&1; then
wget -qO "${sonar_report}" --header="Authorization: Basic ${auth}" "${host}/api/qualitygates/project_status?projectKey=${query}" && sonar_ok=1
fi
elif command -v curl >/dev/null 2>&1; then
curl -fsS "${host}/api/qualitygates/project_status?projectKey=${query}" > "${sonar_report}" && sonar_ok=1
elif command -v wget >/dev/null 2>&1; then
wget -qO "${sonar_report}" "${host}/api/qualitygates/project_status?projectKey=${query}" && sonar_ok=1
fi
if [ "${sonar_ok}" -ne 1 ]; then
cat > "${sonar_report}" <<EOF
{
"status": "ERROR",
"error": "sonarqube query failed"
}
EOF
fi
scan_root=build/data-prepper-supply-chain-scan
rm -rf "${scan_root}"
mkdir -p "${scan_root}/dockerfiles" "${scan_root}/services/logging"
cp dockerfiles/Dockerfile.data-prepper "${scan_root}/dockerfiles/Dockerfile.data-prepper"
cp services/logging/Jenkinsfile.data-prepper "${scan_root}/services/logging/Jenkinsfile.data-prepper"
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL "${scan_root}"
trivy_rc=$?
set -e
printf '%s\n' "${sonar_rc}" > build/sonarqube-analysis.rc
if [ ! -s build/trivy-fs.json ]; then
cat > build/ironbank-compliance.json <<EOF
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
EOF
exit 0
fi
critical="$(jq '[.Results[]? | .Vulnerabilities[]? | select(.Severity=="CRITICAL")] | length' build/trivy-fs.json)"
high="$(jq '[.Results[]? | .Vulnerabilities[]? | select(.Severity=="HIGH")] | length' build/trivy-fs.json)"
secrets="$(jq '[.Results[]? | .Secrets[]?] | length' build/trivy-fs.json)"
misconfigs="$(jq '[.Results[]? | .Misconfigurations[]? | select(.Status=="FAIL" and (.Severity=="CRITICAL" or .Severity=="HIGH"))] | length' build/trivy-fs.json)"
status=ok
compliant=true
if [ "${critical}" -gt 0 ] || [ "${secrets}" -gt 0 ] || [ "${misconfigs}" -gt 0 ]; then
status=failed
compliant=false
fi
jq -n --arg status "${status}" --argjson compliant "${compliant}" --argjson critical "${critical}" --argjson high "${high}" --argjson secrets "${secrets}" --argjson misconfigs "${misconfigs}" --argjson trivy_rc "${trivy_rc}" \
'{status:$status, compliant:$compliant, category:"image_compliance", scan_type:"filesystem", scanner:"trivy", critical_vulnerabilities:$critical, high_vulnerabilities:$high, secrets:$secrets, high_or_critical_misconfigurations:$misconfigs, trivy_rc:$trivy_rc, high_vulnerability_policy:"observe"}' > build/ironbank-compliance.json
'''
}
container('git') {
sh '''
set -euo pipefail
@ -206,148 +127,12 @@ EOF
}
}
}
stage('Validation tests') {
steps {
container('git') {
sh '''#!/usr/bin/env sh
set -eu
mkdir -p build
failures=0
cases=""
dockerfile_present_status="skipped"
pipeline_config_present_status="skipped"
logging_kustomization_includes_data_prepper_status="skipped"
add_case() {
name="$1"
message="$2"
status="passed"
if [ -n "${message}" ]; then
status="failed"
failures=$((failures + 1))
cases="${cases}"'<testcase classname="data_prepper.packaging" name="'"${name}"'"><failure message="'"${message}"'" /></testcase>'
else
cases="${cases}"'<testcase classname="data_prepper.packaging" name="'"${name}"'" />'
fi
case "${name}" in
dockerfile_present) dockerfile_present_status="${status}" ;;
pipeline_config_present) pipeline_config_present_status="${status}" ;;
logging_kustomization_includes_data_prepper) logging_kustomization_includes_data_prepper_status="${status}" ;;
esac
}
if [ -s dockerfiles/Dockerfile.data-prepper ]; then
add_case "dockerfile_present" ""
else
add_case "dockerfile_present" "dockerfiles/Dockerfile.data-prepper is missing or empty"
fi
if [ -s services/logging/scripts/data_prepper_pipelines.yaml ]; then
add_case "pipeline_config_present" ""
else
add_case "pipeline_config_present" "data_prepper_pipelines.yaml is missing or empty"
fi
kustomization_contents="$(cat services/logging/kustomization.yaml 2>/dev/null || true)"
case "${kustomization_contents}" in
*data-prepper-helmrelease.yaml*) add_case "logging_kustomization_includes_data_prepper" "" ;;
*) add_case "logging_kustomization_includes_data_prepper" "services/logging/kustomization.yaml does not include data-prepper HelmRelease" ;;
esac
cat > build/junit-data-prepper.xml <<EOF
<testsuite name="data_prepper.packaging" tests="3" failures="${failures}" errors="0" skipped="0">
${cases}
</testsuite>
EOF
passed=$((3 - failures))
cat > build/test-counts.env <<EOF
test_passed_count=${passed}
test_failed_count=${failures}
test_error_count=0
test_skipped_count=0
EOF
cat > build/testcase-status.env <<EOF
dockerfile_present_status=${dockerfile_present_status}
pipeline_config_present_status=${pipeline_config_present_status}
logging_kustomization_includes_data_prepper_status=${logging_kustomization_includes_data_prepper_status}
EOF
if [ "${failures}" -ne 0 ]; then
exit 1
fi
'''
}
}
}
stage('Enforce quality gate') {
steps {
container('git') {
sh '''
set -euo pipefail
apk add --no-cache jq >/dev/null 2>&1 || true
fail=0
enabled() {
case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
1|true|yes|on) return 0 ;;
*) return 1 ;;
esac
}
if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
sonar_status="$(jq -r '.status // .projectStatus.status // .qualityGate.status // empty' build/sonarqube-quality-gate.json 2>/dev/null | tr '[:upper:]' '[:lower:]')"
[ -n "${sonar_status}" ] || sonar_status="missing"
case "${sonar_status}" in
ok|pass|passed|success) ;;
*)
echo "sonarqube gate failed: ${sonar_status}" >&2
fail=1
;;
esac
fi
if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-1}"
compliant="$(jq -r '.compliant // empty' build/ironbank-compliance.json 2>/dev/null || true)"
supply_status=""
if [ "${compliant}" = "true" ]; then
supply_status="ok"
elif [ "${compliant}" = "false" ]; then
supply_status="failed"
else
supply_status="$(jq -r '.status // .result // .compliance // empty' build/ironbank-compliance.json 2>/dev/null | tr '[:upper:]' '[:lower:]')"
fi
[ -n "${supply_status}" ] || supply_status="missing"
case "${supply_status}" in
ok|pass|passed|success|compliant) ;;
not_applicable|na|n/a)
if enabled "${ironbank_required}"; then
echo "supply chain gate required but status=${supply_status}" >&2
fail=1
fi
;;
*)
if enabled "${ironbank_required}"; then
echo "supply chain gate failed: ${supply_status}" >&2
fail=1
else
echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
fi
;;
esac
fi
exit "${fail}"
'''
}
}
}
stage('Build & Push') {
steps {
container('kaniko') {
withCredentials([usernamePassword(credentialsId: 'harbor-robot-streaming', usernameVariable: 'HARBOR_USERNAME', passwordVariable: 'HARBOR_PASSWORD')]) {
withCredentials([usernamePassword(credentialsId: 'harbor-robot', usernameVariable: 'HARBOR_USERNAME', passwordVariable: 'HARBOR_PASSWORD')]) {
sh '''
set -euo pipefail
IMAGE_TAG="${IMAGE_TAG:-2.8.0}"
PUSH_LATEST="${PUSH_LATEST:-true}"
if [ -z "${HARBOR_REPO:-}" ] || [ "${HARBOR_REPO}" = "registry.bstein.dev/monitoring/data-prepper" ]; then
HARBOR_REPO="registry.bstein.dev/streaming/data-prepper"
fi
@ -441,93 +226,34 @@ EOF
if [ "${status}" != "ok" ]; then
gate_glue_check="failed"
fi
metric_branch_raw="${BRANCH_NAME:-${GIT_BRANCH:-unknown}}"
metric_branch_raw="${metric_branch_raw#origin/}"
metric_branch="$(printf '%s' "${metric_branch_raw}" | jq -Rsa . | sed -e 's/^"//' -e 's/"$//')"
metric_build_number="$(printf '%s' "${BUILD_NUMBER:-unknown}" | jq -Rsa . | sed -e 's/^"//' -e 's/"$//')"
metric_jenkins_job="$(printf '%s' "${JOB_NAME:-data-prepper}" | jq -Rsa . | sed -e 's/^"//' -e 's/"$//')"
export METRIC_SUITE="${suite}"
export METRIC_BRANCH_RAW="${metric_branch_raw}"
export METRIC_BUILD_NUMBER_RAW="${BUILD_NUMBER:-unknown}"
export METRIC_JENKINS_JOB_RAW="${JOB_NAME:-data-prepper}"
if [ ! -s build/test-counts.env ] || [ ! -s build/testcase-status.env ]; then
cat > build/test-counts.env <<EOF
test_passed_count=0
test_failed_count=0
test_error_count=0
test_skipped_count=1
EOF
cat > build/testcase-status.env <<EOF
dockerfile_present_status=skipped
pipeline_config_present_status=skipped
logging_kustomization_includes_data_prepper_status=skipped
EOF
fi
. build/testcase-status.env
if [ "${dockerfile_present_status}" = "skipped" ] && [ "${pipeline_config_present_status}" = "skipped" ] && [ "${logging_kustomization_includes_data_prepper_status}" = "skipped" ]; then
cat > build/testcase-metrics.prom <<METRICS
platform_quality_gate_test_case_result{suite="${suite}",branch="${metric_branch}",build_number="${metric_build_number}",jenkins_job="${metric_jenkins_job}",test="__no_test_cases__",status="skipped"} 1
METRICS
else
cat > build/testcase-metrics.prom <<METRICS
platform_quality_gate_test_case_result{suite="${suite}",branch="${metric_branch}",build_number="${metric_build_number}",jenkins_job="${metric_jenkins_job}",test="data_prepper.packaging::dockerfile_present",status="${dockerfile_present_status}"} 1
platform_quality_gate_test_case_result{suite="${suite}",branch="${metric_branch}",build_number="${metric_build_number}",jenkins_job="${metric_jenkins_job}",test="data_prepper.packaging::pipeline_config_present",status="${pipeline_config_present_status}"} 1
platform_quality_gate_test_case_result{suite="${suite}",branch="${metric_branch}",build_number="${metric_build_number}",jenkins_job="${metric_jenkins_job}",test="data_prepper.packaging::logging_kustomization_includes_data_prepper",status="${logging_kustomization_includes_data_prepper_status}"} 1
METRICS
fi
. build/test-counts.env
tests_check="ok"
if [ "$((test_failed_count + test_error_count))" -gt 0 ]; then
tests_check="failed"
fi
cat > build/platform-quality-metrics.prom <<METRICS
cat <<METRICS | curl -fsS -X PUT --data-binary @- "${gateway}/metrics/job/platform-quality-ci/suite/${suite}" >/dev/null || \
echo "warning: metrics push failed for suite=${suite}" >&2
# TYPE platform_quality_gate_runs_total counter
platform_quality_gate_runs_total{suite="${suite}",status="ok"} ${ok_count}
platform_quality_gate_runs_total{suite="${suite}",status="failed"} ${failed_count}
# TYPE data_prepper_quality_gate_tests_total gauge
data_prepper_quality_gate_tests_total{suite="${suite}",result="passed"} ${test_passed_count}
data_prepper_quality_gate_tests_total{suite="${suite}",result="failed"} ${test_failed_count}
data_prepper_quality_gate_tests_total{suite="${suite}",result="error"} ${test_error_count}
data_prepper_quality_gate_tests_total{suite="${suite}",result="skipped"} ${test_skipped_count}
data_prepper_quality_gate_tests_total{suite="${suite}",result="passed"} 0
data_prepper_quality_gate_tests_total{suite="${suite}",result="failed"} 0
data_prepper_quality_gate_tests_total{suite="${suite}",result="error"} 0
data_prepper_quality_gate_tests_total{suite="${suite}",result="skipped"} 0
# TYPE platform_quality_gate_workspace_line_coverage_percent gauge
# No coverable project source is present in this packaging suite; report full
# non-applicable coverage so rollups do not confuse N/A with uncovered code.
platform_quality_gate_workspace_line_coverage_percent{suite="${suite}"} 100
platform_quality_gate_workspace_line_coverage_percent{suite="${suite}"} 0
# TYPE platform_quality_gate_source_lines_over_500_total gauge
platform_quality_gate_source_lines_over_500_total{suite="${suite}"} 0
# TYPE platform_quality_gate_build_info gauge
platform_quality_gate_build_info{suite="${suite}",branch="${metric_branch}",build_number="${metric_build_number}",jenkins_job="${metric_jenkins_job}"} 1
# TYPE platform_quality_gate_test_case_result gauge
platform_quality_gate_test_case_result{suite="${suite}",test="__no_test_cases__",status="skipped"} 1
# TYPE data_prepper_quality_gate_checks_total gauge
data_prepper_quality_gate_checks_total{suite="${suite}",check="tests",result="${tests_check}"} 1
data_prepper_quality_gate_checks_total{suite="${suite}",check="tests",result="not_applicable"} 1
data_prepper_quality_gate_checks_total{suite="${suite}",check="coverage",result="not_applicable"} 1
data_prepper_quality_gate_checks_total{suite="${suite}",check="loc",result="not_applicable"} 1
data_prepper_quality_gate_checks_total{suite="${suite}",check="docs_naming",result="not_applicable"} 1
data_prepper_quality_gate_checks_total{suite="${suite}",check="gate_glue",result="${gate_glue_check}"} 1
data_prepper_quality_gate_checks_total{suite="${suite}",check="sonarqube",result="${sonarqube_check}"} 1
data_prepper_quality_gate_checks_total{suite="${suite}",check="supply_chain",result="${supply_chain_check}"} 1
# TYPE platform_quality_gate_test_case_result gauge
METRICS
cat build/testcase-metrics.prom >> build/platform-quality-metrics.prom
push_status="$(
curl -sS -o build/pushgateway-response.txt -w '%{http_code}' -X PUT \
--data-binary @build/platform-quality-metrics.prom \
"${gateway}/metrics/job/platform-quality-ci/suite/${suite}" || true
)"
case "${push_status}" in
200|202) ;;
*)
echo "warning: metrics push failed for suite=${suite} status=${push_status}" >&2
cat build/pushgateway-response.txt >&2 || true
;;
esac
'''
}
script {
if (fileExists('build/junit-data-prepper.xml')) {
echo 'JUnit XML generated and archived under build/; Jenkins junit step is not installed on this controller.'
}
}
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
archiveArtifacts artifacts: 'build/**/*.json,build/**/*.xml,build/**/*.txt,build/**/*.rc', allowEmptyArchive: true, fingerprint: true
}
}
}

View File

@ -44,7 +44,8 @@ spec:
path: /var/log/journal
- name: fluentbit-state
emptyDir:
sizeLimit: 1Gi
medium: Memory
sizeLimit: 64Mi
extraVolumeMounts:
- name: runlogjournal
mountPath: /run/log/journal

View File

@ -18,7 +18,6 @@ resources:
- oneoffs/opensearch-ism-job.yaml
- oneoffs/opensearch-dashboards-setup-job.yaml
- oneoffs/opensearch-observability-setup-job.yaml
- opensearch-prune-cronjob.yaml
- fluent-bit-helmrelease.yaml
- node-log-rotation-daemonset.yaml
- node-image-gc-rpi4-daemonset.yaml
@ -46,12 +45,6 @@ configMapGenerator:
- node_image_prune_rpi5.sh=scripts/node_image_prune_rpi5.sh
options:
disableNameSuffixHash: true
- name: opensearch-prune-script
namespace: logging
files:
- prune.py=scripts/opensearch_prune.py
options:
disableNameSuffixHash: true
- name: opensearch-observability-script
namespace: logging
files:

View File

@ -12,8 +12,6 @@ spec:
type: RollingUpdate
template:
metadata:
annotations:
logging.bstein.dev/node-log-rotation-rev: "2026-04-27-3"
labels:
app: node-log-rotation
spec:

View File

@ -1,48 +0,0 @@
# services/logging/opensearch-prune-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: opensearch-prune
namespace: logging
spec:
schedule: "23 3 * * *"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 3
jobTemplate:
spec:
backoffLimit: 2
template:
spec:
restartPolicy: OnFailure
nodeSelector:
node-role.kubernetes.io/worker: "true"
hardware: rpi5
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: hardware
operator: In
values:
- rpi5
containers:
- name: prune
image: python:3.11-alpine
command: ["python", "/scripts/prune.py"]
env:
- name: OPENSEARCH_URL
value: http://opensearch-master.logging.svc.cluster.local:9200
- name: LOG_LIMIT_BYTES
value: "1099511627776"
- name: LOG_INDEX_PATTERNS
value: "kube-*,journald-*,trace-analytics-*"
volumeMounts:
- name: scripts
mountPath: /scripts
volumes:
- name: scripts
configMap:
name: opensearch-prune-script

View File

@ -99,24 +99,4 @@ if [ "${changed}" -eq 1 ]; then
fi
fi
trim_constrained_pod_logs() {
local base usage
for base in /host/mnt/astraios/var/log /host/var/log.hdd; do
if [ ! -d "${base}/pods" ]; then
continue
fi
usage="$(df -P "${base}" | awk 'NR==2 {gsub(/%/, "", $5); print $5}')"
if [ -z "${usage}" ] || [ "${usage}" -lt 75 ]; then
continue
fi
find "${base}/pods" -type f \( -name '[1-9]*.log' -o -name '*.log.20*' \) -size +1M -print -exec truncate -s 0 {} \; 2>/dev/null || true
if [ -d "${base}/containers" ]; then
find "${base}/containers" -xtype l -print -delete 2>/dev/null || true
fi
done
}
while true; do
trim_constrained_pod_logs
sleep 600
done
sleep infinity

View File

@ -1,77 +0,0 @@
import json
import os
import re
import sys
import urllib.error
import urllib.request
os_url = os.environ.get("OPENSEARCH_URL", "http://opensearch-master.logging.svc.cluster.local:9200").rstrip("/")
limit_bytes = int(os.environ.get("LOG_LIMIT_BYTES", str(1024**4)))
patterns = [p.strip() for p in os.environ.get("LOG_INDEX_PATTERNS", "kube-*,journald-*").split(",") if p.strip()]
UNITS = {
"b": 1,
"kb": 1024,
"mb": 1024**2,
"gb": 1024**3,
"tb": 1024**4,
}
def parse_size(value: str) -> int:
if not value:
return 0
text = value.strip().lower()
if text in ("-", "0"):
return 0
match = re.match(r"^([0-9.]+)([a-z]+)$", text)
if not match:
return 0
number = float(match.group(1))
unit = match.group(2)
if unit not in UNITS:
return 0
return int(number * UNITS[unit])
def request_json(path: str):
url = f"{os_url}{path}"
with urllib.request.urlopen(url, timeout=30) as response:
payload = response.read().decode("utf-8")
return json.loads(payload)
def delete_index(index: str) -> None:
url = f"{os_url}/{index}"
req = urllib.request.Request(url, method="DELETE")
with urllib.request.urlopen(req, timeout=30) as response:
_ = response.read()
print(f"deleted {index}")
indices = []
for pattern in patterns:
try:
data = request_json(f"/_cat/indices/{pattern}?format=json&h=index,store.size,creation.date")
except urllib.error.HTTPError as exc:
if exc.code == 404:
continue
raise
for item in data:
index = item.get("index")
if not index or index.startswith("."):
continue
size = parse_size(item.get("store.size", ""))
created = int(item.get("creation.date", "0") or 0)
indices.append({"index": index, "size": size, "created": created})
total = sum(item["size"] for item in indices)
print(f"total_log_bytes={total}")
if total <= limit_bytes:
print("within limit")
sys.exit(0)
indices.sort(key=lambda item: item["created"])
for item in indices:
if total <= limit_bytes:
break
delete_index(item["index"])
total -= item["size"]
print(f"remaining_log_bytes={total}")

View File

@ -764,15 +764,6 @@ spec:
spec:
template:
spec:
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values: ["titan-13", "titan-15", "titan-17", "titan-19"]
containers:
- name: tika
env:

View File

@ -14,7 +14,6 @@ resources:
- serverstransport.yaml
- ingressroute.yaml
- oneoffs/mailu-sync-job.yaml
- mailu-sync-cronjob.yaml
- front-lb.yaml
configMapGenerator:

View File

@ -1,93 +0,0 @@
# services/mailu/mailu-sync-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: mailu-sync-nightly
namespace: mailu-mailserver
labels:
atlas.bstein.dev/glue: "true"
spec:
schedule: "30 4 * * *"
suspend: true
concurrencyPolicy: Forbid
jobTemplate:
spec:
template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "mailu-mailserver"
vault.hashicorp.com/agent-inject-secret-mailu-db-secret__database: "kv/data/atlas/mailu/mailu-db-secret"
vault.hashicorp.com/agent-inject-template-mailu-db-secret__database: |
{{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.database }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mailu-db-secret__username: "kv/data/atlas/mailu/mailu-db-secret"
vault.hashicorp.com/agent-inject-template-mailu-db-secret__username: |
{{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.username }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mailu-db-secret__password: "kv/data/atlas/mailu/mailu-db-secret"
vault.hashicorp.com/agent-inject-template-mailu-db-secret__password: |
{{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.password }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-id: "kv/data/atlas/mailu/mailu-sync-credentials"
vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-id: |
{{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-id" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-secret: "kv/data/atlas/mailu/mailu-sync-credentials"
vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-secret: |
{{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-secret" }}{{- end -}}
vault.hashicorp.com/agent-inject-secret-mailu-initial-account-secret__password: "kv/data/atlas/mailu/mailu-initial-account-secret"
vault.hashicorp.com/agent-inject-template-mailu-initial-account-secret__password: |
{{- with secret "kv/data/atlas/mailu/mailu-initial-account-secret" -}}{{ .Data.data.password }}{{- end -}}
spec:
restartPolicy: OnFailure
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
serviceAccountName: mailu-vault-sync
containers:
- name: mailu-sync
image: python:3.11-alpine
imagePullPolicy: IfNotPresent
command: ["/bin/sh", "-c"]
args:
- |
set -euo pipefail
. /vault/scripts/mailu_vault_env.sh
pip install --no-cache-dir requests psycopg2-binary passlib >/tmp/pip.log \
&& python /app/sync.py
env:
- name: KEYCLOAK_BASE_URL
value: http://keycloak.sso.svc.cluster.local
- name: KEYCLOAK_REALM
value: atlas
- name: MAILU_DOMAIN
value: bstein.dev
- name: MAILU_DEFAULT_QUOTA
value: "20000000000"
- name: MAILU_SYSTEM_USERS
value: "no-reply-portal@bstein.dev,no-reply-vaultwarden@bstein.dev"
- name: MAILU_DB_HOST
value: postgres-service.postgres.svc.cluster.local
- name: MAILU_DB_PORT
value: "5432"
volumeMounts:
- name: sync-script
mountPath: /app/sync.py
subPath: sync.py
- name: vault-scripts
mountPath: /vault/scripts
readOnly: true
resources:
requests:
cpu: 50m
memory: 128Mi
limits:
cpu: 200m
memory: 256Mi
volumes:
- name: sync-script
configMap:
name: mailu-sync-script
defaultMode: 0444
- name: vault-scripts
configMap:
name: mailu-vault-env
defaultMode: 0555

View File

@ -1,5 +1,3 @@
"""HTTP debounce wrapper for triggering the Mailu Keycloak sync job."""
import http.server
import json
import os

View File

@ -18,15 +18,13 @@ spec:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
maintenance.bstein.dev/restart-rev: "20260413-jenkins-api-2"
maintenance.bstein.dev/restart-rev: "20260207-2"
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "maintenance"
vault.hashicorp.com/agent-inject-secret-ariadne-env.sh: "kv/data/atlas/maintenance/ariadne-db"
vault.hashicorp.com/agent-inject-template-ariadne-env.sh: |
{{ with secret "kv/data/atlas/maintenance/ariadne-db" }}
export ARIADNE_DATABASE_URL="{{ .Data.data.database_url }}"
export JENKINS_API_USER="{{ .Data.data.jenkins_api_user }}"
export JENKINS_API_TOKEN="{{ .Data.data.jenkins_api_token }}"
{{ end }}
{{ with secret "kv/data/atlas/portal/atlas-portal-db" }}
export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}"
@ -106,36 +104,6 @@ spec:
nodeSelector:
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: atlas.bstein.dev/spillover
operator: DoesNotExist
- weight: 95
preference:
matchExpressions:
- key: kubernetes.io/hostname
operator: NotIn
values:
- titan-13
- titan-15
- titan-17
- titan-19
- weight: 90
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5"]
- weight: 50
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi4"]
containers:
- name: ariadne
image: registry.bstein.dev/bstein/ariadne:latest
@ -340,9 +308,9 @@ spec:
- name: ARIADNE_SCHEDULE_IMAGE_SWEEPER
value: "0 */4 * * *"
- name: ARIADNE_SCHEDULE_VAULT_K8S_AUTH
value: "0 0 1 1 *"
value: "*/15 * * * *"
- name: ARIADNE_SCHEDULE_VAULT_OIDC
value: "0 0 1 1 *"
value: "*/15 * * * *"
- name: ARIADNE_SCHEDULE_COMMS_GUEST_NAME
value: "*/5 * * * *"
- name: ARIADNE_SCHEDULE_COMMS_PIN_INVITE
@ -377,12 +345,10 @@ spec:
value: "15"
- name: ARIADNE_SCHEDULE_METIS_SENTINEL_WATCH
value: "*/30 * * * *"
- name: ARIADNE_SCHEDULE_JENKINS_BUILD_WEATHER
value: "*/10 * * * *"
- name: JENKINS_BASE_URL
value: https://ci.bstein.dev
- name: JENKINS_API_TIMEOUT_SEC
value: "10"
- name: ARIADNE_SCHEDULE_METIS_K3S_TOKEN_SYNC
value: "11 */6 * * *"
- name: ARIADNE_SCHEDULE_PLATFORM_QUALITY_SUITE_PROBE
value: "*/15 * * * *"
- name: ARIADNE_SCHEDULE_JENKINS_WORKSPACE_CLEANUP
value: "45 */6 * * *"
- name: JENKINS_WORKSPACE_NAMESPACE
@ -392,7 +358,7 @@ spec:
- name: JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS
value: "24"
- name: JENKINS_WORKSPACE_CLEANUP_DRY_RUN
value: "false"
value: "true"
- name: JENKINS_WORKSPACE_CLEANUP_MAX_DELETIONS_PER_RUN
value: "20"
- name: METRICS_PATH

View File

@ -16,13 +16,6 @@ rules:
- apiGroups: [""]
resources:
- pods
verbs:
- get
- list
- watch
- delete
- apiGroups: [""]
resources:
- persistentvolumeclaims
- persistentvolumes
verbs:

View File

@ -1,53 +0,0 @@
# services/maintenance/image-sweeper-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: image-sweeper
namespace: maintenance
spec:
schedule: "30 4 * * 0"
suspend: true
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 2
failedJobsHistoryLimit: 2
jobTemplate:
spec:
template:
spec:
serviceAccountName: node-image-sweeper
restartPolicy: OnFailure
nodeSelector:
kubernetes.io/os: linux
kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true"
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
containers:
- name: image-sweeper
image: python:3.12.9-alpine3.20
command: ["/bin/sh", "/scripts/node_image_sweeper.sh"]
env:
- name: ONE_SHOT
value: "true"
securityContext:
privileged: true
runAsUser: 0
volumeMounts:
- name: host-root
mountPath: /host
- name: script
mountPath: /scripts
readOnly: true
volumes:
- name: host-root
hostPath:
path: /
- name: script
configMap:
name: node-image-sweeper-script
defaultMode: 0555

View File

@ -36,29 +36,11 @@ spec:
apiVersion: image.toolkit.fluxcd.io/v1beta2
kind: ImagePolicy
metadata:
name: metis-amd64
name: metis
namespace: maintenance
spec:
imageRepositoryRef:
name: metis
filterTags:
pattern: '^(?P<version>0\.1\.0-\d+)-amd64$'
extract: '$version'
policy:
semver:
range: ">=0.1.0-0"
---
apiVersion: image.toolkit.fluxcd.io/v1beta2
kind: ImagePolicy
metadata:
name: metis-arm64
namespace: maintenance
spec:
imageRepositoryRef:
name: metis
filterTags:
pattern: '^(?P<version>0\.1\.0-\d+)-arm64$'
extract: '$version'
policy:
semver:
range: ">=0.1.0-0"
@ -77,29 +59,11 @@ spec:
apiVersion: image.toolkit.fluxcd.io/v1beta2
kind: ImagePolicy
metadata:
name: metis-sentinel-amd64
name: metis-sentinel
namespace: maintenance
spec:
imageRepositoryRef:
name: metis-sentinel
filterTags:
pattern: '^(?P<version>0\.1\.0-\d+)-amd64$'
extract: '$version'
policy:
semver:
range: ">=0.1.0-0"
---
apiVersion: image.toolkit.fluxcd.io/v1beta2
kind: ImagePolicy
metadata:
name: metis-sentinel-arm64
namespace: maintenance
spec:
imageRepositoryRef:
name: metis-sentinel
filterTags:
pattern: '^(?P<version>0\.1\.0-\d+)-arm64$'
extract: '$version'
policy:
semver:
range: ">=0.1.0-0"

View File

@ -26,7 +26,6 @@ resources:
- metis-deployment.yaml
- soteria-deployment.yaml
- oneoffs/ariadne-migrate-job.yaml
- oneoffs/titan-24-rootfs-sweep-job.yaml
- ariadne-service.yaml
- soteria-service.yaml
- disable-k3s-traefik-daemonset.yaml
@ -48,18 +47,12 @@ resources:
- metis-ingress.yaml
images:
- name: registry.bstein.dev/bstein/ariadne
newTag: 0.1.0-188 # {"$imagepolicy": "maintenance:ariadne:tag"}
newTag: 0.1.0-22 # {"$imagepolicy": "maintenance:ariadne:tag"}
- name: registry.bstein.dev/bstein/metis
newTag: 0.1.0-103-arm64 # {"$imagepolicy": "maintenance:metis-arm64:tag"}
newTag: 0.1.0-9-amd64
- name: registry.bstein.dev/bstein/soteria
newTag: 0.1.0-36 # {"$imagepolicy": "maintenance:soteria:tag"}
newTag: 0.1.0-35 # {"$imagepolicy": "maintenance:soteria:tag"}
configMapGenerator:
- name: metis-inventory
namespace: maintenance
files:
- inventory.yaml=metis-inventory.yaml
options:
disableNameSuffixHash: true
- name: disable-k3s-traefik-script
namespace: maintenance
files:

View File

@ -2,12 +2,12 @@
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: recovery-tls
name: sentinel-tls
namespace: maintenance
spec:
secretName: recovery-tls
secretName: sentinel-tls
issuerRef:
kind: ClusterIssuer
name: letsencrypt
dnsNames:
- recovery.bstein.dev
- sentinel.bstein.dev

View File

@ -8,21 +8,19 @@ data:
METIS_BIND_ADDR: :8080
METIS_INVENTORY_PATH: /app/inventory.titan-rpi4.yaml
METIS_DATA_DIR: /var/lib/metis
METIS_DEFAULT_FLASH_HOST: titan-20
METIS_FLASH_HOSTS: titan-20,titan-21,titan-22,titan-24,titan-19,titan-17,titan-15,titan-14,titan-12,titan-11,titan-10,titan-09,titan-08,titan-07,titan-06,titan-05,titan-04,titan-0c,titan-0b,titan-0a
METIS_LOCAL_HOST: titan-20
METIS_DEFAULT_FLASH_HOST: titan-22
METIS_FLASH_HOSTS: titan-22,titan-24,titan-20,titan-21,titan-19,titan-17,titan-15,titan-14,titan-12,titan-11,titan-10,titan-09,titan-08,titan-07,titan-06,titan-05,titan-04,titan-0c,titan-0b,titan-0a
METIS_LOCAL_HOST: titan-22
METIS_ALLOWED_GROUPS: admin,maintenance
METIS_MAX_DEVICE_BYTES: "1000000000000"
METIS_NAMESPACE: maintenance
METIS_REMOTE_POD_TIMEOUT_SEC: "14400"
METIS_RUNNER_IMAGE_AMD64: registry.bstein.dev/bstein/metis:0.1.0-103-amd64 # {"$imagepolicy": "maintenance:metis-amd64"}
METIS_RUNNER_IMAGE_ARM64: registry.bstein.dev/bstein/metis:0.1.0-103-arm64 # {"$imagepolicy": "maintenance:metis-arm64"}
METIS_RUNNER_IMAGE_AMD64: registry.bstein.dev/bstein/metis:0.1.0-23-amd64
METIS_RUNNER_IMAGE_ARM64: registry.bstein.dev/bstein/metis:0.1.0-23-arm64
METIS_HARBOR_REGISTRY: registry.bstein.dev
METIS_HARBOR_PROJECT: metis
METIS_HARBOR_API_BASE: https://registry.bstein.dev/api/v2.0
METIS_HARBOR_USERNAME: admin
METIS_HOST_TMP_DIR: /var/tmp/metis-flash-test
METIS_REMOTE_WORKSPACE_DIR: /var/tmp/metis-workspace
METIS_HOST_TMP_DIR: /tmp/metis-flash-test
METIS_SENTINEL_PUSH_URL: http://metis.maintenance.svc.cluster.local/internal/sentinel/snapshot
METIS_SENTINEL_INTERVAL_SEC: "1800"
METIS_SENTINEL_NSENTER: "1"

View File

@ -2,7 +2,7 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: metis-data-longhorn
name: metis-data
namespace: maintenance
spec:
accessModes:
@ -10,4 +10,4 @@ spec:
resources:
requests:
storage: 40Gi
storageClassName: longhorn
storageClassName: local-path

View File

@ -18,7 +18,7 @@ spec:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/path: "/metrics"
metis.bstein.dev/config-rev: "2026-04-24-01"
metis.bstein.dev/config-rev: "2026-04-06-02"
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "maintenance"
@ -27,15 +27,9 @@ spec:
{{ with secret "kv/data/atlas/maintenance/metis-runtime" }}
export METIS_K3S_TOKEN="{{ .Data.data.k3s_token }}"
{{ end }}
vault.hashicorp.com/agent-inject-secret-metis-harbor-env.sh: "kv/data/atlas/harbor/harbor-core"
vault.hashicorp.com/agent-inject-template-metis-harbor-env.sh: |
{{ with secret "kv/data/atlas/harbor/harbor-core" }}
export METIS_HARBOR_PASSWORD="{{ .Data.data.harbor_admin_password }}"
{{ end }}
vault.hashicorp.com/agent-inject-secret-metis-ssh-env.sh: "kv/data/atlas/maintenance/metis-ssh-keys"
vault.hashicorp.com/agent-inject-template-metis-ssh-env.sh: |
{{ with secret "kv/data/atlas/maintenance/metis-ssh-keys" }}
export METIS_SSH_KEY_BASTION="{{ or .Data.data.bastion_pub .Data.data.brad_pub "" }}"
export METIS_SSH_KEY_BRAD="{{ .Data.data.brad_pub }}"
export METIS_SSH_KEY_ANANKE_TETHYS="{{ or .Data.data.ananke_tethys_pub .Data.data.hecate_tethys_pub "" }}"
export METIS_SSH_KEY_ANANKE_DB="{{ or .Data.data.ananke_db_pub .Data.data.hecate_db_pub "" }}"
@ -43,31 +37,10 @@ spec:
spec:
serviceAccountName: metis
terminationGracePeriodSeconds: 30
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/arch
operator: In
values:
- arm64
- key: longhorn-host
operator: In
values:
- "true"
- key: node-role.kubernetes.io/worker
operator: In
values:
- "true"
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: hardware
operator: In
values:
- rpi5
nodeSelector:
kubernetes.io/hostname: titan-22
kubernetes.io/arch: amd64
node-role.kubernetes.io/accelerator: "true"
containers:
- name: metis
image: registry.bstein.dev/bstein/metis:latest
@ -76,7 +49,6 @@ spec:
args:
- >-
. /vault/secrets/metis-runtime-env.sh
&& . /vault/secrets/metis-harbor-env.sh
&& . /vault/secrets/metis-ssh-env.sh
&& exec metis serve
envFrom:
@ -100,9 +72,6 @@ spec:
periodSeconds: 5
timeoutSeconds: 2
volumeMounts:
- name: metis-inventory
mountPath: /etc/metis
readOnly: true
- name: metis-data
mountPath: /var/lib/metis
- name: host-dev
@ -124,13 +93,9 @@ spec:
privileged: true
runAsUser: 0
volumes:
- name: metis-inventory
configMap:
name: metis-inventory
defaultMode: 0444
- name: metis-data
persistentVolumeClaim:
claimName: metis-data-longhorn
claimName: metis-data
- name: host-dev
hostPath:
path: /dev

View File

@ -12,10 +12,10 @@ metadata:
spec:
ingressClassName: traefik
tls:
- hosts: ["recovery.bstein.dev"]
secretName: recovery-tls
- hosts: ["sentinel.bstein.dev"]
secretName: sentinel-tls
rules:
- host: recovery.bstein.dev
- host: sentinel.bstein.dev
http:
paths:
- path: /

View File

@ -1,150 +0,0 @@
# services/maintenance/metis-inventory.yaml
classes:
- name: rpi5-ubuntu-worker
arch: arm64
os: ubuntu-24.04
image: ${METIS_IMAGE_RPI5_UBUNTU_WORKER}
checksum: ${METIS_IMAGE_RPI5_UBUNTU_WORKER_SHA256}
k3s_version: v1.33.3+k3s1
default_labels:
hardware: rpi5
node-role.kubernetes.io/worker: "true"
- name: rpi4-armbian-worker
arch: arm64
os: armbian-noble
image: ${METIS_IMAGE_RPI4_ARMBIAN_LONGHORN}
checksum: ${METIS_IMAGE_RPI4_ARMBIAN_LONGHORN_SHA256}
k3s_version: v1.31.5+k3s1
default_labels:
hardware: rpi4
node-role.kubernetes.io/worker: "true"
- name: rpi4-armbian-longhorn
arch: arm64
os: armbian-noble
image: ${METIS_IMAGE_RPI4_ARMBIAN_LONGHORN}
checksum: ${METIS_IMAGE_RPI4_ARMBIAN_LONGHORN_SHA256}
k3s_version: v1.31.5+k3s1
default_labels:
hardware: rpi4
node-role.kubernetes.io/worker: "true"
nodes:
- name: titan-10
class: rpi5-ubuntu-worker
hostname: titan-10
ip: 192.168.22.36
k3s_role: agent
k3s_url: https://192.168.22.7:6443
k3s_token: ${METIS_K3S_TOKEN}
ssh_user: ubuntu
ssh_authorized_keys:
- ${METIS_SSH_KEY_BRAD}
- ${METIS_SSH_KEY_ANANKE_TETHYS}
- ${METIS_SSH_KEY_ANANKE_DB}
- name: titan-12
class: rpi4-armbian-worker
hostname: titan-12
ip: 192.168.22.40
k3s_role: agent
k3s_url: https://192.168.22.7:6443
k3s_token: ${METIS_K3S_TOKEN}
ssh_user: atlas
ssh_authorized_keys:
- ${METIS_SSH_KEY_BRAD}
- ${METIS_SSH_KEY_ANANKE_TETHYS}
- ${METIS_SSH_KEY_ANANKE_DB}
- name: titan-16
class: rpi4-armbian-worker
hostname: titan-16
ip: 192.168.22.44
k3s_role: agent
k3s_url: https://192.168.22.7:6443
k3s_token: ${METIS_K3S_TOKEN}
ssh_user: atlas
ssh_authorized_keys:
- ${METIS_SSH_KEY_BRAD}
- ${METIS_SSH_KEY_ANANKE_TETHYS}
- ${METIS_SSH_KEY_ANANKE_DB}
usb_scratch:
mountpoint: /mnt/scratch
label: titan-16-scratch
fs: ext4
bind_targets:
- /var/lib/rancher
- /var/log
- name: titan-13
class: rpi4-armbian-longhorn
hostname: titan-13
ip: 192.168.22.41
k3s_role: agent
k3s_url: https://192.168.22.7:6443
k3s_token: ${METIS_K3S_TOKEN}
ssh_user: atlas
ssh_authorized_keys:
- ${METIS_SSH_KEY_BRAD}
- ${METIS_SSH_KEY_ANANKE_TETHYS}
- ${METIS_SSH_KEY_ANANKE_DB}
longhorn_disks:
- mountpoint: /mnt/astreae
uuid: 6031fa8b-f28c-45c3-b7bc-6133300e07c6
fs: ext4
- mountpoint: /mnt/asteria
uuid: cbd4989d-62b5-4741-8b2a-28fdae259cae
fs: ext4
- name: titan-15
class: rpi4-armbian-longhorn
hostname: titan-15
ip: 192.168.22.43
k3s_role: agent
k3s_url: https://192.168.22.7:6443
k3s_token: ${METIS_K3S_TOKEN}
ssh_user: atlas
ssh_authorized_keys:
- ${METIS_SSH_KEY_BRAD}
- ${METIS_SSH_KEY_ANANKE_TETHYS}
- ${METIS_SSH_KEY_ANANKE_DB}
longhorn_disks:
- mountpoint: /mnt/astreae
uuid: f3362f14-5822-449f-944b-ac570b5cd615
fs: ext4
- mountpoint: /mnt/asteria
uuid: 9c5316e6-f847-4884-b502-11f2d0d15d6f
fs: ext4
- name: titan-17
class: rpi4-armbian-longhorn
hostname: titan-17
ip: 192.168.22.45
k3s_role: agent
k3s_url: https://192.168.22.7:6443
k3s_token: ${METIS_K3S_TOKEN}
ssh_user: atlas
ssh_authorized_keys:
- ${METIS_SSH_KEY_BRAD}
- ${METIS_SSH_KEY_ANANKE_TETHYS}
- ${METIS_SSH_KEY_ANANKE_DB}
longhorn_disks:
- mountpoint: /mnt/astreae
uuid: 1fecdade-08b0-49cb-9ae3-be6c188b0a96
fs: ext4
- mountpoint: /mnt/asteria
uuid: 2fe9f613-d372-47ca-b84f-82084e4edda0
fs: ext4
- name: titan-19
class: rpi4-armbian-longhorn
hostname: titan-19
ip: 192.168.22.47
k3s_role: agent
k3s_url: https://192.168.22.7:6443
k3s_token: ${METIS_K3S_TOKEN}
ssh_user: atlas
ssh_authorized_keys:
- ${METIS_SSH_KEY_BRAD}
- ${METIS_SSH_KEY_ANANKE_TETHYS}
- ${METIS_SSH_KEY_ANANKE_DB}
longhorn_disks:
- mountpoint: /mnt/astreae
uuid: 4890abb9-dda2-4f4f-9c0f-081ee82849cf
fs: ext4
- mountpoint: /mnt/asteria
uuid: 2b4ea28d-b0e6-4fa3-841b-cd7067ae9153
fs: ext4

View File

@ -1,55 +0,0 @@
# services/maintenance/metis-k3s-token-sync-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: metis-k3s-token-sync
namespace: maintenance
spec:
schedule: "11 */6 * * *"
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 2
jobTemplate:
spec:
template:
spec:
serviceAccountName: metis-token-sync
restartPolicy: OnFailure
nodeName: titan-0a
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
containers:
- name: sync
image: hashicorp/vault:1.17.6
imagePullPolicy: IfNotPresent
command:
- /bin/sh
- -c
args:
- |
set -eu
token="$(tr -d '\n' < /host/var/lib/rancher/k3s/server/token)"
jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
VAULT_TOKEN="$(vault write -field=token auth/kubernetes/login role="${VAULT_K8S_ROLE}" jwt="${jwt}")"
export VAULT_TOKEN
vault kv put kv/atlas/maintenance/metis-runtime k3s_token="${token}"
env:
- name: VAULT_ADDR
value: http://vault.vault.svc.cluster.local:8200
- name: VAULT_K8S_ROLE
value: maintenance-metis-token-sync
securityContext:
runAsUser: 0
volumeMounts:
- name: k3s-server
mountPath: /host/var/lib/rancher/k3s/server
readOnly: true
volumes:
- name: k3s-server
hostPath:
path: /var/lib/rancher/k3s/server

View File

@ -12,7 +12,6 @@ rules:
- list
- watch
- delete
- patch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role

View File

@ -10,8 +10,6 @@ spec:
app: metis-sentinel-amd64
updateStrategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 25%
template:
metadata:
labels:
@ -31,7 +29,7 @@ spec:
kubernetes.io/arch: amd64
containers:
- name: metis-sentinel
image: registry.bstein.dev/bstein/metis-sentinel:0.1.0-103-amd64 # {"$imagepolicy": "maintenance:metis-sentinel-amd64"}
image: registry.bstein.dev/bstein/metis-sentinel:0.1.0-0-amd64
imagePullPolicy: Always
envFrom:
- configMapRef:

View File

@ -10,8 +10,6 @@ spec:
app: metis-sentinel-arm64
updateStrategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 25%
template:
metadata:
labels:
@ -31,7 +29,7 @@ spec:
kubernetes.io/arch: arm64
containers:
- name: metis-sentinel
image: registry.bstein.dev/bstein/metis-sentinel:0.1.0-103-arm64 # {"$imagepolicy": "maintenance:metis-sentinel-arm64"}
image: registry.bstein.dev/bstein/metis-sentinel:0.1.0-0-arm64
imagePullPolicy: Always
envFrom:
- configMapRef:

View File

@ -6,7 +6,7 @@ metadata:
namespace: maintenance
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
prometheus.io/port: "80"
prometheus.io/path: "/metrics"
spec:
type: ClusterIP

View File

@ -74,7 +74,7 @@ spec:
args:
- --provider=oidc
- --config=/vault/secrets/oidc-config
- --redirect-url=https://recovery.bstein.dev/oauth2/callback
- --redirect-url=https://sentinel.bstein.dev/oauth2/callback
- --oidc-issuer-url=https://sso.bstein.dev/realms/atlas
- --scope=openid profile email groups
- --email-domain=*
@ -96,7 +96,7 @@ spec:
- --approval-prompt=auto
- --skip-jwt-bearer-tokens=true
- --oidc-groups-claim=groups
- --cookie-domain=recovery.bstein.dev
- --cookie-domain=sentinel.bstein.dev
ports:
- containerPort: 4180
name: http

View File

@ -1,70 +0,0 @@
# services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml
# One-off emergency cleanup for titan-24 rootfs pressure.
# Safe to delete the finished Job/pod after it succeeds.
apiVersion: batch/v1
kind: Job
metadata:
name: titan-24-rootfs-sweep
namespace: maintenance
annotations:
kustomize.toolkit.fluxcd.io/force: "true"
spec:
backoffLimit: 6
ttlSecondsAfterFinished: 3600
template:
metadata:
labels:
app: titan-24-rootfs-sweep
spec:
restartPolicy: OnFailure
nodeSelector:
kubernetes.io/hostname: titan-24
tolerations:
- key: node.kubernetes.io/not-ready
operator: Exists
effect: NoSchedule
- key: node.kubernetes.io/unreachable
operator: Exists
effect: NoSchedule
- key: node.kubernetes.io/not-ready
operator: Exists
effect: NoExecute
tolerationSeconds: 300
- key: node.kubernetes.io/unreachable
operator: Exists
effect: NoExecute
tolerationSeconds: 300
containers:
- name: sweep
image: python:3.12.9-alpine3.20
command: ["/bin/sh", "/scripts/node_image_sweeper.sh"]
env:
- name: ONE_SHOT
value: "true"
- name: HIGH_USAGE_PERCENT
value: "0"
- name: EMERGENCY_USAGE_PERCENT
value: "0"
- name: LOG_RETENTION_DAYS
value: "1"
- name: ORPHAN_POD_RETENTION_DAYS
value: "0"
- name: JOURNAL_MAX_SIZE
value: "100M"
securityContext:
privileged: true
runAsUser: 0
volumeMounts:
- name: host-root
mountPath: /host
- name: script
mountPath: /scripts
readOnly: true
volumes:
- name: host-root
hostPath:
path: /
- name: script
configMap:
name: node-image-sweeper-script
defaultMode: 0555

Some files were not shown because too many files have changed in this diff Show More