Compare commits
414 Commits
main
...
feature/at
| Author | SHA1 | Date | |
|---|---|---|---|
| 6e4cafa3df | |||
| 41021c472b | |||
| 17afb0bb55 | |||
| 1e0e73a28f | |||
| af01a620c3 | |||
| 0edc513e2e | |||
| 3659c9c07b | |||
| 11d58dccb7 | |||
| 5bcff5f405 | |||
| f5dcea860e | |||
| a1e90f4600 | |||
| f04f032721 | |||
| 083999c84c | |||
| dc62a84e2e | |||
| 31ffaedf2a | |||
| b2d1dc4e3f | |||
|
|
271a941d89 | ||
|
|
fa30a2cade | ||
| f71d0bc3f3 | |||
|
|
19a3207eac | ||
| 2d5107f7e2 | |||
| a091ea75a3 | |||
| 95dabf5df8 | |||
|
|
311cec8adf | ||
|
|
b18e355412 | ||
|
|
80057210fc | ||
|
|
7a1e99a95e | ||
|
|
ace86ad736 | ||
|
|
2a4deb6dd1 | ||
|
|
eee5456921 | ||
| f86d3a4c00 | |||
| a6b77c68f0 | |||
| 9599b4c975 | |||
| df96c06fa2 | |||
| e575e6cb1e | |||
|
|
bca66c5d71 | ||
| b2affe091d | |||
|
|
6c7f2112c2 | ||
| a4874163ec | |||
| 079f8efbb9 | |||
| 95228b75ab | |||
| 9e75bf0b42 | |||
| b2841985ef | |||
| 9553995ba5 | |||
| e840777668 | |||
| 718a1ca312 | |||
| 55f0347b70 | |||
| f77e13b2cb | |||
| fd2b10d00d | |||
| 4209299a40 | |||
| 1804ff06c6 | |||
| 4b5913827d | |||
| 80548a2e82 | |||
|
|
29756b1e62 | ||
| 4bc91c40f6 | |||
| 1260d18cdf | |||
| 47efd0be06 | |||
|
|
fa410c8f1e | ||
| 0ed75718c2 | |||
| 50ff59a33b | |||
|
|
9d9bcd1988 | ||
|
|
c96749bab6 | ||
| 5e239accbd | |||
|
|
c50298c8fe | ||
|
|
3fcab34b7d | ||
| e223ef8e76 | |||
| 7f72683242 | |||
| eeb8475848 | |||
| 839b79696c | |||
| 920f146efb | |||
|
|
c2c5474bc8 | ||
|
|
eab7ed5cff | ||
|
|
22eb1a1159 | ||
| d7c1ecd098 | |||
|
|
96288c9fdd | ||
|
|
a71bf7d9d5 | ||
| 533baa6d0c | |||
|
|
cee353e305 | ||
|
|
436d24ea70 | ||
|
|
6fb80e37e8 | ||
|
|
132e73100f | ||
|
|
fe8cc40903 | ||
|
|
947a43e630 | ||
|
|
31679b59f5 | ||
|
|
77b81e1e9a | ||
|
|
6523e45b3f | ||
|
|
49414c6cca | ||
|
|
6efa280e9d | ||
|
|
ff81cfdb82 | ||
|
|
c4b0250321 | ||
|
|
c1a8aa43d6 | ||
|
|
0275adb5b7 | ||
|
|
663143660b | ||
|
|
cb25cf7571 | ||
|
|
33127dde26 | ||
|
|
dc214cee79 | ||
|
|
4395986b0c | ||
|
|
fba7fe9029 | ||
|
|
8ecc8dd548 | ||
|
|
672a559e52 | ||
|
|
0dedf4083e | ||
|
|
bf8b99e365 | ||
|
|
a33ad1c073 | ||
|
|
be90638fac | ||
|
|
3bc6d29f54 | ||
|
|
4e88c55e57 | ||
|
|
b8c94d5870 | ||
|
|
7f83d2f936 | ||
|
|
d42aa42d8a | ||
|
|
86f512fa1a | ||
|
|
16e2b19ea9 | ||
|
|
a1cb07c6d6 | ||
|
|
558d24ad6b | ||
|
|
160218a4ae | ||
|
|
2e361e620e | ||
|
|
fcd0ea9872 | ||
|
|
75826b0e5e | ||
|
|
71ddd03899 | ||
|
|
2d3a0b0184 | ||
|
|
c7fb848a62 | ||
|
|
c643c965b8 | ||
|
|
618be5ce01 | ||
|
|
ac049e6bb9 | ||
|
|
50108afc57 | ||
|
|
1f74a29445 | ||
|
|
08bc5f4b82 | ||
|
|
c208314506 | ||
|
|
763e5ff9e9 | ||
|
|
5ecb42cfef | ||
|
|
102d8e56ff | ||
|
|
ac96c5482f | ||
|
|
71aa60c696 | ||
|
|
d7582da21b | ||
|
|
4bf3773eb3 | ||
|
|
895ea49dc5 | ||
|
|
f355f6dd6a | ||
| 9f87e61f4a | |||
|
|
9a2890c45c | ||
|
|
ad74a45e76 | ||
| fda4860d67 | |||
| 9f8a0f94d2 | |||
| 51d12791ca | |||
| 9fb36f23cd | |||
|
|
1a2fe05808 | ||
|
|
0c5ec895ee | ||
| 7c87e177e9 | |||
|
|
5e6d2a938f | ||
|
|
09070c2cc6 | ||
|
|
5dd30d8802 | ||
|
|
f302cb2448 | ||
| c0a231fd91 | |||
|
|
87f8a6d2c0 | ||
|
|
78a0867215 | ||
| b0da9080c7 | |||
| 8e3feeeaac | |||
| 6f2ecdb364 | |||
| a5e168e55f | |||
|
|
87dc1209b1 | ||
| f86845053e | |||
|
|
c04c5ab048 | ||
|
|
ec3bdb7225 | ||
|
|
4b68809bb9 | ||
|
|
661bc6ac7d | ||
| a9ee943344 | |||
| 826df7d960 | |||
|
|
8dfe124212 | ||
|
|
a3bef857f9 | ||
|
|
ed766d7a02 | ||
| 4295913056 | |||
|
|
e3dfa2c0ea | ||
|
|
6bf8181677 | ||
| d67f3d6fca | |||
|
|
41a0363fbc | ||
| a609e230f2 | |||
|
|
37342bfe4a | ||
| a509354067 | |||
|
|
fb14516674 | ||
| 60c80cc86f | |||
|
|
7b8ea36554 | ||
| 49224375a0 | |||
| 7d7ddd52dc | |||
| cd7043c7f1 | |||
| fb82a038e9 | |||
| 93bcea5893 | |||
| 0ba8578416 | |||
| 86475b8bdf | |||
| f19eaf3b6b | |||
|
|
e537180f1f | ||
|
|
8298ed5c16 | ||
|
|
152a28bd09 | ||
| 7e02cccbe8 | |||
|
|
e60b1594c0 | ||
|
|
87b2b37918 | ||
|
|
a1249b3e00 | ||
| 5000d1f76b | |||
|
|
584625b893 | ||
| 95f4ecc4e0 | |||
| 240e04f9a2 | |||
| 449b8fed64 | |||
|
|
f6d655bb0c | ||
| 4fa1b6e84c | |||
| 168efd78f7 | |||
| e0bd11fa57 | |||
| 3f43299c92 | |||
| 645790f404 | |||
| f11f6a4e62 | |||
|
|
c559253a31 | ||
|
|
a3619ce215 | ||
|
|
398fb7b797 | ||
| b30e6af95d | |||
|
|
4fd79b4708 | ||
| f23da3aea5 | |||
|
|
d951ae5061 | ||
| dfe9916e91 | |||
|
|
036c758547 | ||
| 382a6e49ee | |||
| 93e7449509 | |||
| 58d1c168ff | |||
|
|
889400cdbf | ||
|
|
e06066a327 | ||
| 138f8c4407 | |||
| 33569aff99 | |||
| 3e2f56da7d | |||
|
|
0914ba3509 | ||
|
|
865a979424 | ||
|
|
5dfc3ed259 | ||
| b479364017 | |||
|
|
00d8f852a3 | ||
|
|
2d7f744284 | ||
| 5f1b1a6cd0 | |||
|
|
e966961dbe | ||
| 7ffb0aba5d | |||
|
|
e80a439725 | ||
|
|
8a22825796 | ||
| 1fabd4ce2f | |||
| 759ac5ef90 | |||
|
|
bc971cce92 | ||
|
|
069f6b4983 | ||
| 64cfd5180d | |||
|
|
8a087fb16d | ||
|
|
652c3a28a3 | ||
|
|
141c54ccf3 | ||
|
|
0f8529c7c5 | ||
|
|
dafba36768 | ||
| 4d5e9552e3 | |||
| ddf1d41fd3 | |||
|
|
49e630f7fd | ||
|
|
b7a81d28d1 | ||
| 109c00bc3c | |||
|
|
c9ad055b4c | ||
| 10498c659b | |||
|
|
978bd8e595 | ||
| 259552ac28 | |||
|
|
7f2ded5244 | ||
| e4c370b983 | |||
|
|
7dfc98b6d6 | ||
| cb60c64bce | |||
|
|
091f095893 | ||
| 5b389d12df | |||
|
|
ae88bc8484 | ||
| 529576e082 | |||
|
|
a7ffaa3213 | ||
|
|
e478f1c74d | ||
| 2480b6cecc | |||
| bbe27f963d | |||
|
|
c5da854cef | ||
| 0319707fff | |||
| 4f8d8f1f25 | |||
| 5448ff3f55 | |||
| b6c2d1416e | |||
|
|
152e1d88f4 | ||
| 86e9dc289f | |||
|
|
c4b7198c46 | ||
| f8a12be2ec | |||
|
|
c9ec5126cd | ||
|
|
c66db7c18f | ||
|
|
de47ab76a5 | ||
| c788512d59 | |||
|
|
ae25ccb6f2 | ||
|
|
e27f4cfc68 | ||
| 50e06b4a13 | |||
| 934d6e7a3b | |||
|
|
25654a731e | ||
| 4aecadb3de | |||
| 3b79a82c71 | |||
|
|
04b263dc2d | ||
| 93841d9de7 | |||
| bb294c6d21 | |||
|
|
64962f8863 | ||
| bcb4c05b14 | |||
|
|
d00a09fb58 | ||
|
|
a22ff047f7 | ||
|
|
fef5d7d26a | ||
| fa60fa124c | |||
| 30c1192978 | |||
| 644be2c575 | |||
| 29d1bf9f4e | |||
| 9bdab331b6 | |||
| 8f49ac2d63 | |||
|
|
43b9cd27ed | ||
| 580ac4950b | |||
|
|
d677e83423 | ||
|
|
bff55a6dc7 | ||
|
|
0465658ba7 | ||
|
|
3e484ba726 | ||
|
|
088bb3b435 | ||
|
|
e81bad9d47 | ||
| 3f11a065a3 | |||
|
|
ec6375f31d | ||
|
|
5a8360ed97 | ||
|
|
9e75f82d43 | ||
|
|
7ac26eb0dd | ||
| 00d2f6a61f | |||
|
|
687ca2c22d | ||
| 52281ca2ec | |||
|
|
8850e9fdf1 | ||
| a253993451 | |||
|
|
aeff2bbe73 | ||
| 39616b2435 | |||
|
|
b3d8674499 | ||
| 3ca0fb352d | |||
| f7ea7d57e9 | |||
|
|
a418844f61 | ||
| 96d914d02c | |||
| e6c031829a | |||
| ebfb19c34e | |||
| 4fedec3999 | |||
| 55f78f2eb7 | |||
| ab5ef933d8 | |||
| 3e23109229 | |||
| d18c06ad31 | |||
| 292a6b7e04 | |||
|
|
d7fd5682f3 | ||
| bedab04b22 | |||
| 6d7a32ce11 | |||
| 87ded58aca | |||
|
|
5f30ab73bf | ||
|
|
3f2d2e5fdb | ||
|
|
f55e9a6043 | ||
|
|
7de15db57a | ||
|
|
265f809f8f | ||
|
|
e4d19fc5b4 | ||
|
|
d10eace338 | ||
| 78afc97db2 | |||
|
|
3c0d4d0f4f | ||
|
|
d73d6d7c01 | ||
|
|
af02ee7abf | ||
| 630a596cb6 | |||
|
|
d2729138b6 | ||
| a6fbcc8669 | |||
|
|
d91d632496 | ||
|
|
3a9949a24d | ||
| b045506516 | |||
|
|
3f24de03d1 | ||
|
|
a3ffcb2ea1 | ||
|
|
314a922109 | ||
|
|
2ed4762fab | ||
| 1c6d572559 | |||
|
|
58cc15a7e0 | ||
|
|
3da28531fd | ||
|
|
58f818cebc | ||
|
|
cff7ec922e | ||
|
|
a49f0580da | ||
|
|
10d4f015b2 | ||
|
|
669849b883 | ||
|
|
9ce9470677 | ||
| c3555d59f7 | |||
| 28af553498 | |||
| d42385de3e | |||
| 6104035474 | |||
| dabf043ce6 | |||
| 9b8ef436c8 | |||
| 8cf24a6c96 | |||
| 2797464b45 | |||
| 320cf901ba | |||
| 5bb0fc126e | |||
| 1b8271ed61 | |||
| fab030e9c0 | |||
| be6b65cedb | |||
| cbed39bd64 | |||
| 445622e936 | |||
| 17e28d2891 | |||
| 8325827c41 | |||
| 7c7ed38ead | |||
| 5d2fb32ff8 | |||
|
|
b62a5ba3fb | ||
| 359445ab43 | |||
| 4d1382cfc9 | |||
| b66c7de5fd | |||
| 3d4e5bdde1 | |||
| f37baf2447 | |||
| ad3d8d75c9 | |||
| 4ecfdcef7c | |||
|
|
63ae3e3f6f | ||
| eab2ce50b1 | |||
|
|
523db13be0 | ||
| 6a3f8cffe1 | |||
| 80a0f424cd | |||
| 8e9d85ccd7 | |||
| 85abd589d4 | |||
|
|
bfbd707293 | ||
|
|
526a895775 | ||
| 38e1eba112 | |||
|
|
f9e6cabe6d | ||
| 36bb695c15 | |||
|
|
b449b65244 | ||
| 1a9651914e | |||
|
|
9e5be20983 | ||
| d55bc98bbe | |||
|
|
46d677f5e7 | ||
| ef63b0f9f3 | |||
| 111ae84255 | |||
| d78a3c2550 | |||
| fb89158622 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,6 +2,7 @@
|
||||
!README.md
|
||||
!knowledge/**/*.md
|
||||
!services/comms/knowledge/**/*.md
|
||||
!services/atlasbot/knowledge/**/*.md
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
.pytest_cache
|
||||
|
||||
374
Jenkinsfile
vendored
374
Jenkinsfile
vendored
@ -11,47 +11,9 @@ spec:
|
||||
hardware: rpi5
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: NotIn
|
||||
values:
|
||||
- titan-06
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: NotIn
|
||||
values:
|
||||
- titan-13
|
||||
- titan-15
|
||||
- titan-17
|
||||
- titan-19
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
jenkins/jenkins-jenkins-agent: "true"
|
||||
containers:
|
||||
- name: jnlp
|
||||
image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
|
||||
resources:
|
||||
requests:
|
||||
cpu: "25m"
|
||||
memory: "256Mi"
|
||||
- name: python
|
||||
image: registry.bstein.dev/bstein/python:3.12-slim
|
||||
command:
|
||||
- cat
|
||||
tty: true
|
||||
- name: quality-tools
|
||||
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
|
||||
image: python:3.12-slim
|
||||
command:
|
||||
- cat
|
||||
tty: true
|
||||
@ -61,21 +23,6 @@ spec:
|
||||
environment {
|
||||
PIP_DISABLE_PIP_VERSION_CHECK = '1'
|
||||
PYTHONUNBUFFERED = '1'
|
||||
SUITE_NAME = 'titan_iac'
|
||||
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
|
||||
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
|
||||
SONARQUBE_PROJECT_KEY = 'titan_iac'
|
||||
SONARQUBE_TOKEN = credentials('sonarqube-token')
|
||||
VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
|
||||
QUALITY_GATE_SONARQUBE_ENFORCE = '1'
|
||||
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
|
||||
QUALITY_GATE_IRONBANK_ENFORCE = '1'
|
||||
QUALITY_GATE_IRONBANK_REQUIRED = '0'
|
||||
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
|
||||
}
|
||||
options {
|
||||
disableConcurrentBuilds()
|
||||
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
|
||||
}
|
||||
stages {
|
||||
stage('Checkout') {
|
||||
@ -85,295 +32,12 @@ spec:
|
||||
}
|
||||
stage('Install deps') {
|
||||
steps {
|
||||
sh '''
|
||||
set -eu
|
||||
if ! command -v git >/dev/null 2>&1; then
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends git ca-certificates
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
fi
|
||||
pip install --no-cache-dir -r ci/requirements.txt
|
||||
'''
|
||||
sh 'pip install --no-cache-dir -r ci/requirements.txt'
|
||||
}
|
||||
}
|
||||
stage('Prepare local quality evidence') {
|
||||
stage('Glue tests') {
|
||||
steps {
|
||||
sh '''
|
||||
set -eu
|
||||
mkdir -p build
|
||||
set +e
|
||||
python3 -m testing.quality_gate --profile local --build-dir build
|
||||
local_quality_rc=$?
|
||||
set -e
|
||||
printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Collect SonarQube evidence') {
|
||||
steps {
|
||||
container('quality-tools') {
|
||||
sh '''#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
mkdir -p build
|
||||
args=(
|
||||
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
|
||||
"-Dsonar.login=${SONARQUBE_TOKEN}"
|
||||
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
|
||||
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
|
||||
"-Dsonar.sources=."
|
||||
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml"
|
||||
"-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
|
||||
)
|
||||
[ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
|
||||
set +e
|
||||
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
|
||||
rc=${PIPESTATUS[0]}
|
||||
set -e
|
||||
printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
|
||||
'''
|
||||
}
|
||||
sh '''
|
||||
set -eu
|
||||
mkdir -p build
|
||||
python3 - <<'PY'
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
|
||||
project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
|
||||
token = os.getenv('SONARQUBE_TOKEN', '').strip()
|
||||
report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
|
||||
|
||||
payload = {
|
||||
"status": "ERROR",
|
||||
"note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
|
||||
}
|
||||
if host and project_key:
|
||||
task_file = Path('.scannerwork/report-task.txt')
|
||||
task_id = ''
|
||||
if task_file.exists():
|
||||
for line in task_file.read_text(encoding='utf-8').splitlines():
|
||||
key, _, value = line.partition('=')
|
||||
if key == 'ceTaskId':
|
||||
task_id = value.strip()
|
||||
break
|
||||
if task_id:
|
||||
ce_query = urllib.parse.urlencode({"id": task_id})
|
||||
deadline = time.monotonic() + 180
|
||||
while time.monotonic() < deadline:
|
||||
ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
|
||||
if token:
|
||||
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
|
||||
ce_request.add_header("Authorization", f"Basic {encoded}")
|
||||
try:
|
||||
with urllib.request.urlopen(ce_request, timeout=12) as response:
|
||||
ce_payload = json.loads(response.read().decode("utf-8"))
|
||||
except Exception:
|
||||
time.sleep(3)
|
||||
continue
|
||||
status = str(ce_payload.get("task", {}).get("status", "")).upper()
|
||||
if status in {"SUCCESS", "FAILED", "CANCELED"}:
|
||||
break
|
||||
time.sleep(3)
|
||||
|
||||
query = urllib.parse.urlencode({"projectKey": project_key})
|
||||
request = urllib.request.Request(
|
||||
f"{host}/api/qualitygates/project_status?{query}",
|
||||
method="GET",
|
||||
)
|
||||
if token:
|
||||
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
|
||||
request.add_header("Authorization", f"Basic {encoded}")
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=12) as response:
|
||||
payload = json.loads(response.read().decode("utf-8"))
|
||||
except Exception as exc: # noqa: BLE001
|
||||
payload = {"status": "ERROR", "error": str(exc)}
|
||||
|
||||
with open(report_path, "w", encoding="utf-8") as handle:
|
||||
json.dump(payload, handle, indent=2, sort_keys=True)
|
||||
handle.write("\\n")
|
||||
PY
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Collect IronBank evidence') {
|
||||
steps {
|
||||
container('quality-tools') {
|
||||
sh '''#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
mkdir -p build
|
||||
set +e
|
||||
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
|
||||
trivy_rc=$?
|
||||
set -e
|
||||
if [ ! -s build/trivy-fs.json ]; then
|
||||
cat > build/ironbank-compliance.json <<EOF
|
||||
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
|
||||
EOF
|
||||
exit 0
|
||||
fi
|
||||
'''
|
||||
}
|
||||
sh '''
|
||||
set -eu
|
||||
mkdir -p build
|
||||
if [ -s build/trivy-fs.json ]; then
|
||||
python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
|
||||
exit 0
|
||||
fi
|
||||
python3 - <<'PY'
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
|
||||
if report_path.exists():
|
||||
raise SystemExit(0)
|
||||
|
||||
status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
|
||||
compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
|
||||
payload = {
|
||||
"status": status or "unknown",
|
||||
"compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
|
||||
}
|
||||
payload = {k: v for k, v in payload.items() if v is not None}
|
||||
if "status" not in payload:
|
||||
payload["status"] = "unknown"
|
||||
payload["note"] = (
|
||||
"Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
|
||||
"or write build/ironbank-compliance.json in image-building repos."
|
||||
)
|
||||
|
||||
report_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
|
||||
PY
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Run quality gate') {
|
||||
steps {
|
||||
sh '''
|
||||
set -eu
|
||||
mkdir -p build
|
||||
set +e
|
||||
python3 -m testing.quality_gate --profile jenkins --build-dir build
|
||||
quality_gate_rc=$?
|
||||
set -e
|
||||
printf '%s\n' "${quality_gate_rc}" > build/quality-gate.rc
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Publish test metrics') {
|
||||
steps {
|
||||
sh '''
|
||||
set -eu
|
||||
export JUNIT_GLOB='build/junit-*.xml'
|
||||
export QUALITY_GATE_EXIT_CODE_PATH='build/quality-gate.rc'
|
||||
export QUALITY_GATE_SUMMARY_PATH='build/quality-gate-summary.json'
|
||||
python3 ci/scripts/publish_test_metrics.py
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Enforce quality gate') {
|
||||
steps {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)"
|
||||
fail=0
|
||||
if [ "${gate_rc}" -ne 0 ]; then
|
||||
echo "quality gate failed with rc=${gate_rc}" >&2
|
||||
fail=1
|
||||
fi
|
||||
|
||||
enabled() {
|
||||
case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
|
||||
1|true|yes|on) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
|
||||
sonar_status="$(python3 - <<'PY'
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
path = Path("build/sonarqube-quality-gate.json")
|
||||
if not path.exists():
|
||||
print("missing")
|
||||
raise SystemExit(0)
|
||||
try:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
except Exception: # noqa: BLE001
|
||||
print("error")
|
||||
raise SystemExit(0)
|
||||
status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
|
||||
print(status or "missing")
|
||||
PY
|
||||
)"
|
||||
case "${sonar_status}" in
|
||||
ok|pass|passed|success) ;;
|
||||
*)
|
||||
echo "sonarqube gate failed: ${sonar_status}" >&2
|
||||
fail=1
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
|
||||
if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
|
||||
ironbank_required=1
|
||||
fi
|
||||
if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
|
||||
supply_status="$(python3 - <<'PY'
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
path = Path("build/ironbank-compliance.json")
|
||||
if not path.exists():
|
||||
print("missing")
|
||||
raise SystemExit(0)
|
||||
try:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
except Exception: # noqa: BLE001
|
||||
print("error")
|
||||
raise SystemExit(0)
|
||||
compliant = payload.get("compliant")
|
||||
if compliant is True:
|
||||
print("ok")
|
||||
elif compliant is False:
|
||||
print("failed")
|
||||
else:
|
||||
status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
|
||||
print(status or "missing")
|
||||
PY
|
||||
)"
|
||||
case "${supply_status}" in
|
||||
ok|pass|passed|success|compliant) ;;
|
||||
not_applicable|na|n/a)
|
||||
if enabled "${ironbank_required}"; then
|
||||
echo "supply chain gate required but status=${supply_status}" >&2
|
||||
fail=1
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
if enabled "${ironbank_required}"; then
|
||||
echo "supply chain gate failed: ${supply_status}" >&2
|
||||
fail=1
|
||||
else
|
||||
echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
exit "${fail}"
|
||||
'''
|
||||
sh 'pytest -q ci/tests/glue'
|
||||
}
|
||||
}
|
||||
stage('Resolve Flux branch') {
|
||||
@ -381,7 +45,7 @@ PY
|
||||
script {
|
||||
env.FLUX_BRANCH = sh(
|
||||
returnStdout: true,
|
||||
script: "grep -m1 '^\\s*branch:' clusters/atlas/flux-system/gotk-sync.yaml | sed 's/^\\s*branch:\\s*//'"
|
||||
script: "awk '/branch:/{print $2; exit}' clusters/atlas/flux-system/gotk-sync.yaml"
|
||||
).trim()
|
||||
if (!env.FLUX_BRANCH) {
|
||||
error('Flux branch not found in gotk-sync.yaml')
|
||||
@ -400,20 +64,6 @@ PY
|
||||
steps {
|
||||
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
if ! command -v git >/dev/null 2>&1; then
|
||||
if command -v apk >/dev/null 2>&1; then
|
||||
apk add --no-cache git >/dev/null
|
||||
elif command -v apt-get >/dev/null 2>&1; then
|
||||
apt-get update >/dev/null
|
||||
apt-get install -y git >/dev/null
|
||||
fi
|
||||
fi
|
||||
cd "${WORKSPACE:-$PWD}"
|
||||
if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
|
||||
echo "workspace is not a git checkout; skipping promote"
|
||||
exit 0
|
||||
fi
|
||||
set +x
|
||||
git config user.email "jenkins@bstein.dev"
|
||||
git config user.name "jenkins"
|
||||
@ -424,18 +74,4 @@ PY
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
always {
|
||||
script {
|
||||
if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) {
|
||||
try {
|
||||
junit allowEmptyResults: true, testResults: 'build/junit-*.xml'
|
||||
} catch (Throwable err) {
|
||||
echo "junit step unavailable: ${err.class.simpleName}"
|
||||
}
|
||||
}
|
||||
}
|
||||
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
28
README.md
28
README.md
@ -1,29 +1,3 @@
|
||||
# titan-iac
|
||||
|
||||
Flux-managed Kubernetes desired-state config for `bstein.dev`.
|
||||
|
||||
Canonical source URL:
|
||||
- `ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git`
|
||||
|
||||
## Scope
|
||||
|
||||
This repo contains cluster configuration consumed by Flux:
|
||||
- platform/infrastructure manifests
|
||||
- service manifests and kustomizations
|
||||
- operational scripts for render/reconcile workflows
|
||||
|
||||
This repo is **not** the Ananke application source repo.
|
||||
Ananke lives in `bstein/ananke` and orchestrates host-side shutdown/startup behavior around this desired state.
|
||||
|
||||
## Validation workflow
|
||||
|
||||
```bash
|
||||
kustomize build services/<app>
|
||||
kubectl apply --server-side --dry-run=client -k services/<app>
|
||||
flux reconcile kustomization <name> --namespace flux-system --with-source
|
||||
```
|
||||
|
||||
## Apply model
|
||||
|
||||
Use Git + Flux as the source of truth.
|
||||
Avoid manual in-cluster edits for durable changes.
|
||||
Flux-managed Kubernetes cluster for bstein.dev services.
|
||||
|
||||
@ -10,47 +10,9 @@ spec:
|
||||
hardware: rpi5
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: NotIn
|
||||
values:
|
||||
- titan-06
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: NotIn
|
||||
values:
|
||||
- titan-13
|
||||
- titan-15
|
||||
- titan-17
|
||||
- titan-19
|
||||
topologySpreadConstraints:
|
||||
- maxSkew: 1
|
||||
topologyKey: kubernetes.io/hostname
|
||||
whenUnsatisfiable: ScheduleAnyway
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
jenkins/jenkins-jenkins-agent: "true"
|
||||
containers:
|
||||
- name: jnlp
|
||||
image: jenkins/inbound-agent:3355.v388858a_47b_33-2-jdk21
|
||||
resources:
|
||||
requests:
|
||||
cpu: "25m"
|
||||
memory: "256Mi"
|
||||
- name: python
|
||||
image: registry.bstein.dev/bstein/python:3.12-slim
|
||||
command:
|
||||
- cat
|
||||
tty: true
|
||||
- name: quality-tools
|
||||
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
|
||||
image: python:3.12-slim
|
||||
command:
|
||||
- cat
|
||||
tty: true
|
||||
@ -60,21 +22,6 @@ spec:
|
||||
environment {
|
||||
PIP_DISABLE_PIP_VERSION_CHECK = '1'
|
||||
PYTHONUNBUFFERED = '1'
|
||||
SUITE_NAME = 'titan_iac'
|
||||
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
|
||||
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
|
||||
SONARQUBE_PROJECT_KEY = 'titan_iac'
|
||||
SONARQUBE_TOKEN = credentials('sonarqube-token')
|
||||
VM_URL = 'http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428'
|
||||
QUALITY_GATE_SONARQUBE_ENFORCE = '1'
|
||||
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
|
||||
QUALITY_GATE_IRONBANK_ENFORCE = '1'
|
||||
QUALITY_GATE_IRONBANK_REQUIRED = '0'
|
||||
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
|
||||
}
|
||||
options {
|
||||
disableConcurrentBuilds()
|
||||
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
|
||||
}
|
||||
stages {
|
||||
stage('Checkout') {
|
||||
@ -84,295 +31,12 @@ spec:
|
||||
}
|
||||
stage('Install deps') {
|
||||
steps {
|
||||
sh '''
|
||||
set -eu
|
||||
if ! command -v git >/dev/null 2>&1; then
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends git ca-certificates
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
fi
|
||||
pip install --no-cache-dir -r ci/requirements.txt
|
||||
'''
|
||||
sh 'pip install --no-cache-dir -r ci/requirements.txt'
|
||||
}
|
||||
}
|
||||
stage('Prepare local quality evidence') {
|
||||
stage('Glue tests') {
|
||||
steps {
|
||||
sh '''
|
||||
set -eu
|
||||
mkdir -p build
|
||||
set +e
|
||||
python3 -m testing.quality_gate --profile local --build-dir build
|
||||
local_quality_rc=$?
|
||||
set -e
|
||||
printf '%s\n' "${local_quality_rc}" > build/local-quality-gate.rc
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Collect SonarQube evidence') {
|
||||
steps {
|
||||
container('quality-tools') {
|
||||
sh '''#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
mkdir -p build
|
||||
args=(
|
||||
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
|
||||
"-Dsonar.login=${SONARQUBE_TOKEN}"
|
||||
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
|
||||
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
|
||||
"-Dsonar.sources=."
|
||||
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**,services/monitoring/dashboards/**,services/monitoring/grafana-dashboard-*.yaml"
|
||||
"-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
|
||||
)
|
||||
[ -f build/coverage-unit.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage-unit.xml")
|
||||
set +e
|
||||
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
|
||||
rc=${PIPESTATUS[0]}
|
||||
set -e
|
||||
printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
|
||||
'''
|
||||
}
|
||||
sh '''
|
||||
set -eu
|
||||
mkdir -p build
|
||||
python3 - <<'PY'
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
|
||||
host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
|
||||
project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
|
||||
token = os.getenv('SONARQUBE_TOKEN', '').strip()
|
||||
report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
|
||||
|
||||
payload = {
|
||||
"status": "ERROR",
|
||||
"note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY",
|
||||
}
|
||||
if host and project_key:
|
||||
task_file = Path('.scannerwork/report-task.txt')
|
||||
task_id = ''
|
||||
if task_file.exists():
|
||||
for line in task_file.read_text(encoding='utf-8').splitlines():
|
||||
key, _, value = line.partition('=')
|
||||
if key == 'ceTaskId':
|
||||
task_id = value.strip()
|
||||
break
|
||||
if task_id:
|
||||
ce_query = urllib.parse.urlencode({"id": task_id})
|
||||
deadline = time.monotonic() + 180
|
||||
while time.monotonic() < deadline:
|
||||
ce_request = urllib.request.Request(f"{host}/api/ce/task?{ce_query}", method="GET")
|
||||
if token:
|
||||
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
|
||||
ce_request.add_header("Authorization", f"Basic {encoded}")
|
||||
try:
|
||||
with urllib.request.urlopen(ce_request, timeout=12) as response:
|
||||
ce_payload = json.loads(response.read().decode("utf-8"))
|
||||
except Exception:
|
||||
time.sleep(3)
|
||||
continue
|
||||
status = str(ce_payload.get("task", {}).get("status", "")).upper()
|
||||
if status in {"SUCCESS", "FAILED", "CANCELED"}:
|
||||
break
|
||||
time.sleep(3)
|
||||
|
||||
query = urllib.parse.urlencode({"projectKey": project_key})
|
||||
request = urllib.request.Request(
|
||||
f"{host}/api/qualitygates/project_status?{query}",
|
||||
method="GET",
|
||||
)
|
||||
if token:
|
||||
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
|
||||
request.add_header("Authorization", f"Basic {encoded}")
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=12) as response:
|
||||
payload = json.loads(response.read().decode("utf-8"))
|
||||
except Exception as exc: # noqa: BLE001
|
||||
payload = {"status": "ERROR", "error": str(exc)}
|
||||
|
||||
with open(report_path, "w", encoding="utf-8") as handle:
|
||||
json.dump(payload, handle, indent=2, sort_keys=True)
|
||||
handle.write("\\n")
|
||||
PY
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Collect IronBank evidence') {
|
||||
steps {
|
||||
container('quality-tools') {
|
||||
sh '''#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
mkdir -p build
|
||||
set +e
|
||||
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --skip-files clusters/atlas/flux-system/gotk-components.yaml --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
|
||||
trivy_rc=$?
|
||||
set -e
|
||||
if [ ! -s build/trivy-fs.json ]; then
|
||||
cat > build/ironbank-compliance.json <<EOF
|
||||
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
|
||||
EOF
|
||||
exit 0
|
||||
fi
|
||||
'''
|
||||
}
|
||||
sh '''
|
||||
set -eu
|
||||
mkdir -p build
|
||||
if [ -s build/trivy-fs.json ]; then
|
||||
python3 ci/scripts/supply_chain_report.py --trivy-json build/trivy-fs.json --waivers ci/titan-iac-trivy-waivers.json --output build/ironbank-compliance.json
|
||||
exit 0
|
||||
fi
|
||||
python3 - <<'PY'
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
|
||||
if report_path.exists():
|
||||
raise SystemExit(0)
|
||||
|
||||
status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
|
||||
compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
|
||||
payload = {
|
||||
"status": status or "unknown",
|
||||
"compliant": compliant in {"1", "true", "yes", "on"} if compliant else None,
|
||||
}
|
||||
payload = {k: v for k, v in payload.items() if v is not None}
|
||||
if "status" not in payload:
|
||||
payload["status"] = "unknown"
|
||||
payload["note"] = (
|
||||
"Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT "
|
||||
"or write build/ironbank-compliance.json in image-building repos."
|
||||
)
|
||||
|
||||
report_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
|
||||
PY
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Run quality gate') {
|
||||
steps {
|
||||
sh '''
|
||||
set -eu
|
||||
mkdir -p build
|
||||
set +e
|
||||
python3 -m testing.quality_gate --profile jenkins --build-dir build
|
||||
quality_gate_rc=$?
|
||||
set -e
|
||||
printf '%s\n' "${quality_gate_rc}" > build/quality-gate.rc
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Publish test metrics') {
|
||||
steps {
|
||||
sh '''
|
||||
set -eu
|
||||
export JUNIT_GLOB='build/junit-*.xml'
|
||||
export QUALITY_GATE_EXIT_CODE_PATH='build/quality-gate.rc'
|
||||
export QUALITY_GATE_SUMMARY_PATH='build/quality-gate-summary.json'
|
||||
python3 ci/scripts/publish_test_metrics.py
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Enforce quality gate') {
|
||||
steps {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
gate_rc="$(cat build/quality-gate.rc 2>/dev/null || echo 1)"
|
||||
fail=0
|
||||
if [ "${gate_rc}" -ne 0 ]; then
|
||||
echo "quality gate failed with rc=${gate_rc}" >&2
|
||||
fail=1
|
||||
fi
|
||||
|
||||
enabled() {
|
||||
case "$(printf '%s' "${1:-}" | tr '[:upper:]' '[:lower:]')" in
|
||||
1|true|yes|on) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
if enabled "${QUALITY_GATE_SONARQUBE_ENFORCE:-1}"; then
|
||||
sonar_status="$(python3 - <<'PY'
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
path = Path("build/sonarqube-quality-gate.json")
|
||||
if not path.exists():
|
||||
print("missing")
|
||||
raise SystemExit(0)
|
||||
try:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
except Exception: # noqa: BLE001
|
||||
print("error")
|
||||
raise SystemExit(0)
|
||||
status = (payload.get("status") or payload.get("projectStatus", {}).get("status") or payload.get("qualityGate", {}).get("status") or "").strip().lower()
|
||||
print(status or "missing")
|
||||
PY
|
||||
)"
|
||||
case "${sonar_status}" in
|
||||
ok|pass|passed|success) ;;
|
||||
*)
|
||||
echo "sonarqube gate failed: ${sonar_status}" >&2
|
||||
fail=1
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
ironbank_required="${QUALITY_GATE_IRONBANK_REQUIRED:-0}"
|
||||
if [ "${PUBLISH_IMAGES:-false}" = "true" ]; then
|
||||
ironbank_required=1
|
||||
fi
|
||||
if enabled "${QUALITY_GATE_IRONBANK_ENFORCE:-1}"; then
|
||||
supply_status="$(python3 - <<'PY'
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
path = Path("build/ironbank-compliance.json")
|
||||
if not path.exists():
|
||||
print("missing")
|
||||
raise SystemExit(0)
|
||||
try:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
except Exception: # noqa: BLE001
|
||||
print("error")
|
||||
raise SystemExit(0)
|
||||
compliant = payload.get("compliant")
|
||||
if compliant is True:
|
||||
print("ok")
|
||||
elif compliant is False:
|
||||
print("failed")
|
||||
else:
|
||||
status = str(payload.get("status") or payload.get("result") or payload.get("compliance") or "").strip().lower()
|
||||
print(status or "missing")
|
||||
PY
|
||||
)"
|
||||
case "${supply_status}" in
|
||||
ok|pass|passed|success|compliant) ;;
|
||||
not_applicable|na|n/a)
|
||||
if enabled "${ironbank_required}"; then
|
||||
echo "supply chain gate required but status=${supply_status}" >&2
|
||||
fail=1
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
if enabled "${ironbank_required}"; then
|
||||
echo "supply chain gate failed: ${supply_status}" >&2
|
||||
fail=1
|
||||
else
|
||||
echo "supply chain gate not passing (${supply_status}) but not required for this run" >&2
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
fi
|
||||
|
||||
exit "${fail}"
|
||||
'''
|
||||
sh 'pytest -q ci/tests/glue'
|
||||
}
|
||||
}
|
||||
stage('Resolve Flux branch') {
|
||||
@ -399,20 +63,6 @@ PY
|
||||
steps {
|
||||
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
if ! command -v git >/dev/null 2>&1; then
|
||||
if command -v apk >/dev/null 2>&1; then
|
||||
apk add --no-cache git >/dev/null
|
||||
elif command -v apt-get >/dev/null 2>&1; then
|
||||
apt-get update >/dev/null
|
||||
apt-get install -y git >/dev/null
|
||||
fi
|
||||
fi
|
||||
cd "${WORKSPACE:-$PWD}"
|
||||
if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
|
||||
echo "workspace is not a git checkout; skipping promote"
|
||||
exit 0
|
||||
fi
|
||||
set +x
|
||||
git config user.email "jenkins@bstein.dev"
|
||||
git config user.name "jenkins"
|
||||
@ -423,18 +73,4 @@ PY
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
always {
|
||||
script {
|
||||
if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) {
|
||||
try {
|
||||
junit allowEmptyResults: true, testResults: 'build/junit-*.xml'
|
||||
} catch (Throwable err) {
|
||||
echo "junit step unavailable: ${err.class.simpleName}"
|
||||
}
|
||||
}
|
||||
}
|
||||
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,7 +1,4 @@
|
||||
pytest==8.3.4
|
||||
pytest-cov==6.0.0
|
||||
coverage==7.6.10
|
||||
kubernetes==30.1.0
|
||||
PyYAML==6.0.2
|
||||
requests==2.32.3
|
||||
ruff==0.8.4
|
||||
|
||||
@ -1,352 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Publish titan-iac quality-gate results to Pushgateway."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from glob import glob
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
|
||||
|
||||
from ci.scripts import publish_test_metrics_quality as _quality_helpers
|
||||
|
||||
CANONICAL_CHECKS = _quality_helpers.CANONICAL_CHECKS
|
||||
_build_check_statuses = _quality_helpers._build_check_statuses
|
||||
_combine_statuses = _quality_helpers._combine_statuses
|
||||
_infer_sonarqube_status = _quality_helpers._infer_sonarqube_status
|
||||
_infer_source_lines_over_500 = _quality_helpers._infer_source_lines_over_500
|
||||
_infer_supply_chain_status = _quality_helpers._infer_supply_chain_status
|
||||
_infer_workspace_coverage_percent = _quality_helpers._infer_workspace_coverage_percent
|
||||
_load_optional_json = _quality_helpers._load_optional_json
|
||||
_normalize_result_status = _quality_helpers._normalize_result_status
|
||||
|
||||
|
||||
def _escape_label(value: str) -> str:
|
||||
"""Escape a Prometheus label value without changing its content."""
|
||||
return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
|
||||
|
||||
|
||||
def _label_str(labels: dict[str, str]) -> str:
|
||||
"""Render a stable Prometheus label set from a mapping."""
|
||||
parts = [f'{key}="{_escape_label(val)}"' for key, val in labels.items() if val]
|
||||
return "{" + ",".join(parts) + "}" if parts else ""
|
||||
|
||||
|
||||
def _read_text(url: str) -> str:
|
||||
"""Fetch a plain-text response body from the given URL."""
|
||||
with urllib.request.urlopen(url, timeout=10) as response:
|
||||
return response.read().decode("utf-8")
|
||||
|
||||
|
||||
def _post_text(url: str, payload: str) -> None:
|
||||
"""PUT a plain-text payload and fail on any 4xx/5xx response."""
|
||||
request = urllib.request.Request(
|
||||
url,
|
||||
data=payload.encode("utf-8"),
|
||||
method="PUT",
|
||||
headers={"Content-Type": "text/plain"},
|
||||
)
|
||||
with urllib.request.urlopen(request, timeout=10) as response:
|
||||
if response.status >= 400:
|
||||
raise RuntimeError(f"push failed with status={response.status}")
|
||||
|
||||
|
||||
def _parse_junit(path: str) -> dict[str, int]:
|
||||
"""Parse a JUnit XML file into aggregate test counters."""
|
||||
if not os.path.exists(path):
|
||||
return {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
|
||||
|
||||
tree = ET.parse(path)
|
||||
root = tree.getroot()
|
||||
totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
|
||||
|
||||
suites: list[ET.Element]
|
||||
if root.tag == "testsuite":
|
||||
suites = [root]
|
||||
elif root.tag == "testsuites":
|
||||
suites = [elem for elem in root if elem.tag == "testsuite"]
|
||||
else:
|
||||
suites = []
|
||||
|
||||
for suite in suites:
|
||||
for key in totals:
|
||||
raw_value = suite.attrib.get(key, "0")
|
||||
try:
|
||||
totals[key] += int(float(raw_value))
|
||||
except ValueError:
|
||||
totals[key] += 0
|
||||
return totals
|
||||
|
||||
|
||||
def _collect_junit_totals(pattern: str) -> dict[str, int]:
|
||||
"""Sum JUnit counters across every XML file matching the pattern."""
|
||||
totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
|
||||
for path in sorted(glob(pattern)):
|
||||
parsed = _parse_junit(path)
|
||||
for key in totals:
|
||||
totals[key] += parsed[key]
|
||||
return totals
|
||||
|
||||
|
||||
def _collect_junit_cases(pattern: str) -> list[tuple[str, str]]:
|
||||
"""Collect individual JUnit test-case statuses for flaky-test trend panels."""
|
||||
cases: list[tuple[str, str]] = []
|
||||
for path in sorted(glob(pattern)):
|
||||
if not os.path.exists(path):
|
||||
continue
|
||||
root = ET.parse(path).getroot()
|
||||
suites: list[ET.Element]
|
||||
if root.tag == "testsuite":
|
||||
suites = [root]
|
||||
elif root.tag == "testsuites":
|
||||
suites = [elem for elem in root if elem.tag == "testsuite"]
|
||||
else:
|
||||
suites = []
|
||||
for suite in suites:
|
||||
for test_case in suite.findall("testcase"):
|
||||
case_name = test_case.attrib.get("name", "").strip()
|
||||
class_name = test_case.attrib.get("classname", "").strip()
|
||||
if not case_name:
|
||||
continue
|
||||
full_name = f"{class_name}.{case_name}" if class_name else case_name
|
||||
status = "passed"
|
||||
if test_case.find("failure") is not None or test_case.find("error") is not None:
|
||||
status = "failed"
|
||||
elif test_case.find("skipped") is not None:
|
||||
status = "skipped"
|
||||
cases.append((full_name, status))
|
||||
return cases
|
||||
|
||||
|
||||
def _read_exit_code(path: str) -> int:
|
||||
"""Read the quality-gate exit code, defaulting to failure if missing."""
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as handle:
|
||||
return int(handle.read().strip())
|
||||
except (FileNotFoundError, ValueError):
|
||||
return 1
|
||||
|
||||
|
||||
def _load_summary(path: str) -> dict:
|
||||
"""Load the JSON quality-gate summary, returning an empty mapping on error."""
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as handle:
|
||||
return json.load(handle)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
|
||||
def _summary_float(summary: dict, key: str) -> float:
|
||||
"""Extract a float-like value from the summary, defaulting to 0.0."""
|
||||
value = summary.get(key)
|
||||
if isinstance(value, (int, float)):
|
||||
return float(value)
|
||||
return 0.0
|
||||
|
||||
|
||||
def _summary_int(summary: dict, key: str) -> int:
|
||||
"""Extract an int-like value from the summary, defaulting to 0."""
|
||||
value = summary.get(key)
|
||||
if isinstance(value, int):
|
||||
return value
|
||||
if isinstance(value, float):
|
||||
return int(value)
|
||||
return 0
|
||||
|
||||
|
||||
def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float:
|
||||
"""Return the current counter value for a labeled metric if present."""
|
||||
text = _read_text(f"{pushgateway_url.rstrip('/')}/metrics")
|
||||
for line in text.splitlines():
|
||||
if not line.startswith(metric + "{"):
|
||||
continue
|
||||
if any(f'{key}="{value}"' not in line for key, value in labels.items()):
|
||||
continue
|
||||
parts = line.split()
|
||||
if len(parts) < 2:
|
||||
continue
|
||||
try:
|
||||
return float(parts[1])
|
||||
except ValueError:
|
||||
return 0.0
|
||||
return 0.0
|
||||
|
||||
|
||||
def _build_payload(
|
||||
suite: str,
|
||||
status: str,
|
||||
tests: dict[str, int],
|
||||
test_cases: list[tuple[str, str]],
|
||||
ok_count: int,
|
||||
failed_count: int,
|
||||
branch: str,
|
||||
build_number: str,
|
||||
jenkins_job: str,
|
||||
summary: dict | None = None,
|
||||
workspace_line_coverage_percent: float = 0.0,
|
||||
source_lines_over_500: int = 0,
|
||||
check_statuses: dict[str, str] | None = None,
|
||||
) -> str:
|
||||
"""Build the Pushgateway payload for the current suite run."""
|
||||
passed = max(tests["tests"] - tests["failures"] - tests["errors"] - tests["skipped"], 0)
|
||||
build_labels = _label_str(
|
||||
{
|
||||
"suite": suite,
|
||||
"branch": branch or "unknown",
|
||||
"build_number": build_number or "unknown",
|
||||
"jenkins_job": jenkins_job or suite,
|
||||
}
|
||||
)
|
||||
test_case_base_labels = {
|
||||
"suite": suite,
|
||||
"branch": branch or "unknown",
|
||||
"build_number": build_number or "unknown",
|
||||
"jenkins_job": jenkins_job or suite,
|
||||
}
|
||||
lines = [
|
||||
"# TYPE platform_quality_gate_runs_total counter",
|
||||
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count}',
|
||||
f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {failed_count}',
|
||||
"# TYPE titan_iac_quality_gate_tests_total gauge",
|
||||
f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="passed"}} {passed}',
|
||||
f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="failed"}} {tests["failures"]}',
|
||||
f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="error"}} {tests["errors"]}',
|
||||
f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="skipped"}} {tests["skipped"]}',
|
||||
"# TYPE titan_iac_quality_gate_run_status gauge",
|
||||
f'titan_iac_quality_gate_run_status{{suite="{suite}",status="ok"}} {1 if status == "ok" else 0}',
|
||||
f'titan_iac_quality_gate_run_status{{suite="{suite}",status="failed"}} {1 if status == "failed" else 0}',
|
||||
"# TYPE platform_quality_gate_build_info gauge",
|
||||
f"platform_quality_gate_build_info{build_labels} 1",
|
||||
"# TYPE titan_iac_quality_gate_build_info gauge",
|
||||
f"titan_iac_quality_gate_build_info{build_labels} 1",
|
||||
"# TYPE platform_quality_gate_workspace_line_coverage_percent gauge",
|
||||
f'platform_quality_gate_workspace_line_coverage_percent{{suite="{suite}"}} {workspace_line_coverage_percent:.3f}',
|
||||
"# TYPE platform_quality_gate_source_lines_over_500_total gauge",
|
||||
f'platform_quality_gate_source_lines_over_500_total{{suite="{suite}"}} {source_lines_over_500}',
|
||||
]
|
||||
if check_statuses:
|
||||
lines.append("# TYPE titan_iac_quality_gate_checks_total gauge")
|
||||
for check_name in CANONICAL_CHECKS:
|
||||
check_status = check_statuses.get(check_name, "not_applicable")
|
||||
lines.append(
|
||||
f'titan_iac_quality_gate_checks_total{{suite="{suite}",check="{_escape_label(check_name)}",result="{_escape_label(check_status)}"}} 1'
|
||||
)
|
||||
lines.append("# TYPE platform_quality_gate_test_case_result gauge")
|
||||
if test_cases:
|
||||
for test_name, test_status in test_cases:
|
||||
labels = {
|
||||
**test_case_base_labels,
|
||||
"test": test_name,
|
||||
"status": test_status,
|
||||
}
|
||||
lines.append(
|
||||
f"platform_quality_gate_test_case_result{_label_str(labels)} 1"
|
||||
)
|
||||
else:
|
||||
labels = {**test_case_base_labels, "test": "__no_test_cases__", "status": "skipped"}
|
||||
lines.append(
|
||||
f"platform_quality_gate_test_case_result{_label_str(labels)} 1"
|
||||
)
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Publish the quality-gate metrics and print a compact run summary."""
|
||||
suite = os.getenv("SUITE_NAME", "titan_iac")
|
||||
pushgateway_url = os.getenv("PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091")
|
||||
job_name = os.getenv("QUALITY_GATE_JOB_NAME", "platform-quality-ci")
|
||||
junit_glob = os.getenv("JUNIT_GLOB", os.getenv("JUNIT_PATH", "build/junit-*.xml"))
|
||||
exit_code_path = os.getenv("QUALITY_GATE_EXIT_CODE_PATH", os.getenv("GLUE_EXIT_CODE_PATH", "build/quality-gate.rc"))
|
||||
summary_path = os.getenv("QUALITY_GATE_SUMMARY_PATH", "build/quality-gate-summary.json")
|
||||
branch = os.getenv("BRANCH_NAME") or os.getenv("GIT_BRANCH") or "unknown"
|
||||
if branch.startswith("origin/"):
|
||||
branch = branch[len("origin/") :]
|
||||
build_number = os.getenv("BUILD_NUMBER", "")
|
||||
jenkins_job = os.getenv("JOB_NAME", "titan-iac")
|
||||
|
||||
tests = _collect_junit_totals(junit_glob)
|
||||
test_cases = _collect_junit_cases(junit_glob)
|
||||
exit_code = _read_exit_code(exit_code_path)
|
||||
status = "ok" if exit_code == 0 else "failed"
|
||||
summary = _load_summary(summary_path)
|
||||
workspace_line_coverage_percent = _summary_float(summary, "workspace_line_coverage_percent")
|
||||
if workspace_line_coverage_percent <= 0:
|
||||
workspace_line_coverage_percent = _infer_workspace_coverage_percent(summary, "build/coverage-unit.xml")
|
||||
source_lines_over_500 = _summary_int(summary, "source_lines_over_500")
|
||||
if source_lines_over_500 <= 0:
|
||||
source_lines_over_500 = _infer_source_lines_over_500(summary)
|
||||
sonarqube_report = _load_optional_json(os.getenv("QUALITY_GATE_SONARQUBE_REPORT", "build/sonarqube-quality-gate.json"))
|
||||
supply_chain_report = _load_optional_json(os.getenv("QUALITY_GATE_IRONBANK_REPORT", "build/ironbank-compliance.json"))
|
||||
supply_chain_required = os.getenv("QUALITY_GATE_IRONBANK_REQUIRED", "0").strip().lower() in {"1", "true", "yes", "on"}
|
||||
check_statuses = _build_check_statuses(
|
||||
summary=summary,
|
||||
tests=tests,
|
||||
workspace_line_coverage_percent=workspace_line_coverage_percent,
|
||||
source_lines_over_500=source_lines_over_500,
|
||||
sonarqube_report=sonarqube_report,
|
||||
supply_chain_report=supply_chain_report,
|
||||
supply_chain_required=supply_chain_required,
|
||||
)
|
||||
|
||||
ok_count = int(
|
||||
_fetch_existing_counter(
|
||||
pushgateway_url,
|
||||
"platform_quality_gate_runs_total",
|
||||
{"job": job_name, "suite": suite, "status": "ok"},
|
||||
)
|
||||
)
|
||||
failed_count = int(
|
||||
_fetch_existing_counter(
|
||||
pushgateway_url,
|
||||
"platform_quality_gate_runs_total",
|
||||
{"job": job_name, "suite": suite, "status": "failed"},
|
||||
)
|
||||
)
|
||||
if status == "ok":
|
||||
ok_count += 1
|
||||
else:
|
||||
failed_count += 1
|
||||
|
||||
payload = _build_payload(
|
||||
suite=suite,
|
||||
status=status,
|
||||
tests=tests,
|
||||
test_cases=test_cases,
|
||||
ok_count=ok_count,
|
||||
failed_count=failed_count,
|
||||
branch=branch,
|
||||
build_number=build_number,
|
||||
jenkins_job=jenkins_job,
|
||||
summary=summary,
|
||||
workspace_line_coverage_percent=workspace_line_coverage_percent,
|
||||
source_lines_over_500=source_lines_over_500,
|
||||
check_statuses=check_statuses,
|
||||
)
|
||||
push_url = f"{pushgateway_url.rstrip('/')}/metrics/job/{job_name}/suite/{suite}"
|
||||
_post_text(push_url, payload)
|
||||
|
||||
summary = {
|
||||
"suite": suite,
|
||||
"status": status,
|
||||
"tests_total": tests["tests"],
|
||||
"tests_failed": tests["failures"],
|
||||
"tests_error": tests["errors"],
|
||||
"tests_skipped": tests["skipped"],
|
||||
"ok_count": ok_count,
|
||||
"failed_count": failed_count,
|
||||
"checks_recorded": len(check_statuses),
|
||||
"workspace_line_coverage_percent": workspace_line_coverage_percent,
|
||||
"source_lines_over_500": source_lines_over_500,
|
||||
}
|
||||
print(json.dumps(summary, sort_keys=True))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
raise SystemExit(main())
|
||||
@ -1,200 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Quality/status helpers for publish_test_metrics."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
SUCCESS_STATUSES = {"ok", "pass", "passed", "success", "compliant"}
|
||||
NOT_APPLICABLE_STATUSES = {"not_applicable", "n/a", "na", "none", "skipped"}
|
||||
FAILED_STATUSES = {"failed", "fail", "error", "errors", "warn", "warning", "red"}
|
||||
|
||||
CANONICAL_CHECKS = [
|
||||
"tests",
|
||||
"coverage",
|
||||
"loc",
|
||||
"docs_naming",
|
||||
"gate_glue",
|
||||
"sonarqube",
|
||||
"supply_chain",
|
||||
]
|
||||
|
||||
|
||||
def _infer_workspace_coverage_percent(summary: dict, default_xml: str) -> float:
|
||||
"""Infer workspace line coverage from quality summary coverage XML metadata."""
|
||||
results = summary.get("results", []) if isinstance(summary, dict) else []
|
||||
coverage_xml = default_xml
|
||||
for result in results:
|
||||
if not isinstance(result, dict):
|
||||
continue
|
||||
if str(result.get("name") or "").strip().lower() != "coverage":
|
||||
continue
|
||||
candidate = str(result.get("coverage_xml") or "").strip()
|
||||
if candidate:
|
||||
coverage_xml = candidate
|
||||
break
|
||||
xml_path = Path(coverage_xml)
|
||||
if not xml_path.exists():
|
||||
return 0.0
|
||||
try:
|
||||
root = ET.parse(xml_path).getroot()
|
||||
line_rate = root.attrib.get("line-rate")
|
||||
if line_rate is None:
|
||||
return 0.0
|
||||
return float(line_rate) * 100.0
|
||||
except (ET.ParseError, OSError, ValueError):
|
||||
return 0.0
|
||||
|
||||
|
||||
def _infer_source_lines_over_500(summary: dict) -> int:
|
||||
"""Infer over-limit source file count from hygiene issue payloads."""
|
||||
results = summary.get("results", []) if isinstance(summary, dict) else []
|
||||
for result in results:
|
||||
if not isinstance(result, dict):
|
||||
continue
|
||||
if str(result.get("name") or "").strip().lower() not in {"hygiene", "loc", "smell"}:
|
||||
continue
|
||||
issues = result.get("issues")
|
||||
if not isinstance(issues, list):
|
||||
continue
|
||||
return sum(1 for item in issues if isinstance(item, str) and item.startswith("file exceeds"))
|
||||
return 0
|
||||
|
||||
|
||||
def _normalize_result_status(value: str | None, default: str = "failed") -> str:
|
||||
"""Map arbitrary check status text into canonical check result buckets."""
|
||||
if not value:
|
||||
return default
|
||||
normalized = value.strip().lower()
|
||||
if normalized in SUCCESS_STATUSES:
|
||||
return "ok"
|
||||
if normalized in NOT_APPLICABLE_STATUSES:
|
||||
return "not_applicable"
|
||||
if normalized in FAILED_STATUSES:
|
||||
return "failed"
|
||||
return default
|
||||
|
||||
|
||||
def _load_optional_json(path: str | None) -> dict:
|
||||
"""Load an optional JSON report file, returning an empty object when absent."""
|
||||
if not path:
|
||||
return {}
|
||||
candidate = Path(path)
|
||||
if not candidate.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(candidate.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
|
||||
|
||||
def _combine_statuses(statuses: list[str]) -> str:
|
||||
"""Roll up many check statuses into one canonical result."""
|
||||
if not statuses:
|
||||
return "not_applicable"
|
||||
if any(status == "failed" for status in statuses):
|
||||
return "failed"
|
||||
if all(status == "not_applicable" for status in statuses):
|
||||
return "not_applicable"
|
||||
if all(status in {"ok", "not_applicable"} for status in statuses):
|
||||
return "ok"
|
||||
return "failed"
|
||||
|
||||
|
||||
def _infer_sonarqube_status(report: dict) -> str:
|
||||
"""Infer canonical SonarQube check status from its JSON report payload."""
|
||||
if not report:
|
||||
return "not_applicable"
|
||||
status = (
|
||||
report.get("projectStatus", {}).get("status")
|
||||
or report.get("qualityGate", {}).get("status")
|
||||
or report.get("status")
|
||||
)
|
||||
return _normalize_result_status(str(status) if status is not None else None, default="failed")
|
||||
|
||||
|
||||
def _infer_supply_chain_status(report: dict, required: bool) -> str:
|
||||
"""Infer canonical supply-chain status from IronBank/artifact report payload."""
|
||||
if not report:
|
||||
return "failed" if required else "not_applicable"
|
||||
compliant = report.get("compliant")
|
||||
if isinstance(compliant, bool):
|
||||
return "ok" if compliant else "failed"
|
||||
status = report.get("status")
|
||||
if status is None:
|
||||
return "failed" if required else "not_applicable"
|
||||
normalized = _normalize_result_status(str(status), default="failed")
|
||||
if normalized == "not_applicable" and required:
|
||||
return "failed"
|
||||
return normalized
|
||||
|
||||
|
||||
def _build_check_statuses(
|
||||
summary: dict | None,
|
||||
tests: dict[str, int],
|
||||
workspace_line_coverage_percent: float,
|
||||
source_lines_over_500: int,
|
||||
sonarqube_report: dict,
|
||||
supply_chain_report: dict,
|
||||
supply_chain_required: bool,
|
||||
) -> dict[str, str]:
|
||||
"""Generate the canonical quality-check status map for dashboarding."""
|
||||
raw_results = summary.get("results", []) if isinstance(summary, dict) else []
|
||||
status_by_name: dict[str, str] = {}
|
||||
for result in raw_results:
|
||||
if not isinstance(result, dict):
|
||||
continue
|
||||
check_name = str(result.get("name") or "").strip().lower()
|
||||
if not check_name:
|
||||
continue
|
||||
status_by_name[check_name] = _normalize_result_status(result.get("status"), default="failed")
|
||||
|
||||
tests_status = status_by_name.get("tests")
|
||||
if not tests_status:
|
||||
candidate_keys = ["unit", "integration", "e2e", "pytest", "test", "tests"]
|
||||
candidates = [status_by_name[key] for key in candidate_keys if key in status_by_name]
|
||||
if candidates:
|
||||
tests_status = _combine_statuses(candidates)
|
||||
elif tests["tests"] > 0:
|
||||
tests_status = "ok" if (tests["failures"] + tests["errors"]) == 0 else "failed"
|
||||
else:
|
||||
tests_status = "not_applicable"
|
||||
|
||||
coverage_status = status_by_name.get("coverage")
|
||||
if not coverage_status:
|
||||
if workspace_line_coverage_percent > 0:
|
||||
coverage_status = "ok" if workspace_line_coverage_percent >= 95.0 else "failed"
|
||||
else:
|
||||
coverage_status = "not_applicable"
|
||||
|
||||
loc_status = status_by_name.get("loc")
|
||||
if not loc_status:
|
||||
loc_status = "ok" if source_lines_over_500 == 0 else "failed"
|
||||
|
||||
docs_naming_status = status_by_name.get("docs_naming")
|
||||
if not docs_naming_status:
|
||||
candidates = [status_by_name[key] for key in ["docs", "hygiene", "smell", "lint", "naming"] if key in status_by_name]
|
||||
docs_naming_status = _combine_statuses(candidates) if candidates else "not_applicable"
|
||||
|
||||
gate_glue_status = status_by_name.get("gate_glue")
|
||||
if not gate_glue_status:
|
||||
candidates = [status_by_name[key] for key in ["gate_glue", "glue", "gate"] if key in status_by_name]
|
||||
gate_glue_status = _combine_statuses(candidates) if candidates else "not_applicable"
|
||||
|
||||
sonarqube_status = status_by_name.get("sonarqube") or _infer_sonarqube_status(sonarqube_report)
|
||||
supply_chain_status = status_by_name.get("supply_chain") or _infer_supply_chain_status(
|
||||
supply_chain_report,
|
||||
required=supply_chain_required,
|
||||
)
|
||||
|
||||
return {
|
||||
"tests": tests_status,
|
||||
"coverage": coverage_status,
|
||||
"loc": loc_status,
|
||||
"docs_naming": docs_naming_status,
|
||||
"gate_glue": gate_glue_status,
|
||||
"sonarqube": sonarqube_status,
|
||||
"supply_chain": supply_chain_status,
|
||||
}
|
||||
@ -1,173 +0,0 @@
|
||||
"""Build a titan-iac supply-chain compliance report from Trivy evidence."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import datetime as dt
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
FAIL_SEVERITIES = {"HIGH", "CRITICAL"}
|
||||
|
||||
|
||||
def _read_json(path: Path) -> dict[str, Any]:
|
||||
"""Read a JSON object from disk for use as pipeline evidence."""
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError(f"{path} must contain a JSON object")
|
||||
return payload
|
||||
|
||||
|
||||
def _parse_day(raw: str | None) -> dt.date | None:
|
||||
"""Parse an ISO day while letting optional waiver dates stay optional."""
|
||||
if not raw:
|
||||
return None
|
||||
return dt.date.fromisoformat(raw)
|
||||
|
||||
|
||||
def _today(override: str | None = None) -> dt.date:
|
||||
"""Return the policy day so tests can pin expiry behavior."""
|
||||
return _parse_day(override) or dt.date.today()
|
||||
|
||||
|
||||
def _load_waiver_pairs(path: Path | None, policy_day: dt.date) -> tuple[set[tuple[str, str]], int]:
|
||||
"""Return active ``(misconfiguration id, target)`` waivers and expired count."""
|
||||
if path is None or not path.exists():
|
||||
return set(), 0
|
||||
|
||||
payload = _read_json(path)
|
||||
default_expires_at = payload.get("default_expires_at")
|
||||
active: set[tuple[str, str]] = set()
|
||||
expired = 0
|
||||
|
||||
for entry in payload.get("misconfigurations", []):
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
misconfiguration_id = str(entry.get("id") or "").strip()
|
||||
if not misconfiguration_id:
|
||||
continue
|
||||
expires_at = _parse_day(str(entry.get("expires_at") or default_expires_at or ""))
|
||||
targets = entry.get("targets", [])
|
||||
if not isinstance(targets, list):
|
||||
continue
|
||||
|
||||
if expires_at and expires_at < policy_day:
|
||||
expired += len(targets)
|
||||
continue
|
||||
|
||||
# Waivers are target-specific so a new unsafe manifest fails until it is
|
||||
# either fixed or deliberately accepted with a fresh expiration.
|
||||
for target in targets:
|
||||
if isinstance(target, str) and target:
|
||||
active.add((misconfiguration_id, target))
|
||||
|
||||
return active, expired
|
||||
|
||||
|
||||
def _iter_failed_misconfigurations(payload: dict[str, Any]):
|
||||
"""Yield failed high/critical Trivy misconfiguration records."""
|
||||
for result in payload.get("Results", []):
|
||||
if not isinstance(result, dict):
|
||||
continue
|
||||
target = str(result.get("Target") or "")
|
||||
for item in result.get("Misconfigurations") or []:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
if item.get("Status") != "FAIL":
|
||||
continue
|
||||
if str(item.get("Severity") or "").upper() not in FAIL_SEVERITIES:
|
||||
continue
|
||||
yield target, item
|
||||
|
||||
|
||||
def _count_vulnerabilities(payload: dict[str, Any], severity: str) -> int:
|
||||
"""Count Trivy vulnerabilities at a specific severity."""
|
||||
count = 0
|
||||
for result in payload.get("Results", []):
|
||||
if not isinstance(result, dict):
|
||||
continue
|
||||
for item in result.get("Vulnerabilities") or []:
|
||||
if isinstance(item, dict) and str(item.get("Severity") or "").upper() == severity:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def _count_secrets(payload: dict[str, Any]) -> int:
|
||||
"""Count detected secrets in the Trivy filesystem report."""
|
||||
count = 0
|
||||
for result in payload.get("Results", []):
|
||||
if isinstance(result, dict):
|
||||
count += len(result.get("Secrets") or [])
|
||||
return count
|
||||
|
||||
|
||||
def build_report(
|
||||
trivy_payload: dict[str, Any],
|
||||
waiver_path: Path | None = None,
|
||||
today_override: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Build the compliance summary consumed by the quality gate."""
|
||||
policy_day = _today(today_override)
|
||||
active_waivers, expired_waivers = _load_waiver_pairs(waiver_path, policy_day)
|
||||
|
||||
open_misconfigs: list[dict[str, str]] = []
|
||||
waived_misconfigs = 0
|
||||
for target, item in _iter_failed_misconfigurations(trivy_payload):
|
||||
misconfiguration_id = str(item.get("ID") or "")
|
||||
if (misconfiguration_id, target) in active_waivers:
|
||||
waived_misconfigs += 1
|
||||
continue
|
||||
open_misconfigs.append(
|
||||
{
|
||||
"id": misconfiguration_id,
|
||||
"target": target,
|
||||
"severity": str(item.get("Severity") or ""),
|
||||
"title": str(item.get("Title") or ""),
|
||||
}
|
||||
)
|
||||
|
||||
critical = _count_vulnerabilities(trivy_payload, "CRITICAL")
|
||||
high = _count_vulnerabilities(trivy_payload, "HIGH")
|
||||
secrets = _count_secrets(trivy_payload)
|
||||
status = "ok" if critical == 0 and secrets == 0 and not open_misconfigs else "failed"
|
||||
|
||||
return {
|
||||
"status": status,
|
||||
"compliant": status == "ok",
|
||||
"category": "artifact_security",
|
||||
"scan_type": "filesystem",
|
||||
"scanner": "trivy",
|
||||
"critical_vulnerabilities": critical,
|
||||
"high_vulnerabilities": high,
|
||||
"high_vulnerability_policy": "observe",
|
||||
"secrets": secrets,
|
||||
"high_or_critical_misconfigurations": len(open_misconfigs),
|
||||
"waived_misconfigurations": waived_misconfigs,
|
||||
"expired_waivers": expired_waivers,
|
||||
"waiver_file": str(waiver_path) if waiver_path else "",
|
||||
"open_misconfiguration_examples": open_misconfigs[:20],
|
||||
}
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
"""CLI entrypoint used by Jenkins after the Trivy scan completes."""
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--trivy-json", required=True)
|
||||
parser.add_argument("--waivers")
|
||||
parser.add_argument("--output", required=True)
|
||||
parser.add_argument("--today")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
trivy_payload = _read_json(Path(args.trivy_json))
|
||||
waiver_path = Path(args.waivers) if args.waivers else None
|
||||
report = build_report(trivy_payload, waiver_path=waiver_path, today_override=args.today)
|
||||
output_path = Path(args.output)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_path.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
raise SystemExit(main())
|
||||
@ -1,7 +1,6 @@
|
||||
max_success_age_hours: 48
|
||||
allow_suspended:
|
||||
- bstein-dev-home/vaultwarden-cred-sync
|
||||
- comms/guest-name-randomizer
|
||||
- comms/othrys-room-reset
|
||||
- comms/pin-othrys-invite
|
||||
- comms/seed-othrys-room
|
||||
@ -10,7 +9,6 @@ allow_suspended:
|
||||
- health/wger-user-sync
|
||||
- mailu-mailserver/mailu-sync-nightly
|
||||
- nextcloud/nextcloud-mail-sync
|
||||
- vault/vault-oidc-config
|
||||
ariadne_schedule_tasks:
|
||||
- schedule.mailu_sync
|
||||
- schedule.nextcloud_sync
|
||||
|
||||
@ -1,108 +0,0 @@
|
||||
"""Glue checks for Ariadne schedules exported to VictoriaMetrics."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
import yaml
|
||||
|
||||
|
||||
CONFIG_PATH = Path(__file__).with_name("config.yaml")
|
||||
|
||||
|
||||
def _load_config() -> dict:
|
||||
with CONFIG_PATH.open("r", encoding="utf-8") as handle:
|
||||
return yaml.safe_load(handle) or {}
|
||||
|
||||
|
||||
def _query(promql: str) -> list[dict]:
|
||||
vm_url = os.environ.get("VM_URL", "http://victoria-metrics-single-server:8428").rstrip("/")
|
||||
response = requests.get(f"{vm_url}/api/v1/query", params={"query": promql}, timeout=10)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
return payload.get("data", {}).get("result", [])
|
||||
|
||||
|
||||
def _expected_tasks() -> list[dict]:
|
||||
cfg = _load_config()
|
||||
tasks = [
|
||||
_normalize_task(item, cfg)
|
||||
for item in cfg.get("ariadne_schedule_tasks", [])
|
||||
]
|
||||
assert tasks, "No Ariadne schedule tasks configured"
|
||||
return tasks
|
||||
|
||||
|
||||
def _normalize_task(item: object, cfg: dict) -> dict:
|
||||
if isinstance(item, str):
|
||||
return {
|
||||
"task": item,
|
||||
"check_last_success": True,
|
||||
"max_success_age_hours": cfg.get("max_success_age_hours", 48),
|
||||
}
|
||||
if isinstance(item, dict):
|
||||
normalized = dict(item)
|
||||
normalized.setdefault("check_last_success", True)
|
||||
normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
|
||||
return normalized
|
||||
raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
|
||||
|
||||
|
||||
def _tracked_tasks(tasks: list[dict]) -> list[dict]:
|
||||
tracked = [item for item in tasks if item.get("check_last_success")]
|
||||
assert tracked, "No Ariadne schedule tasks are marked for success tracking"
|
||||
return tracked
|
||||
|
||||
|
||||
def _task_regex(tasks: list[dict]) -> str:
|
||||
return "|".join(item["task"] for item in tasks)
|
||||
|
||||
|
||||
def test_ariadne_schedule_series_exist():
|
||||
tasks = _expected_tasks()
|
||||
selector = _task_regex(tasks)
|
||||
series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
|
||||
seen = {item.get("metric", {}).get("task") for item in series}
|
||||
missing = [item["task"] for item in tasks if item["task"] not in seen]
|
||||
assert not missing, f"Missing next-run metrics for: {', '.join(missing)}"
|
||||
|
||||
|
||||
def test_ariadne_schedule_recent_success():
|
||||
tasks = _tracked_tasks(_expected_tasks())
|
||||
selector = _task_regex(tasks)
|
||||
series = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
|
||||
seen = {item.get("metric", {}).get("task") for item in series}
|
||||
missing = [item["task"] for item in tasks if item["task"] not in seen]
|
||||
assert not missing, f"Missing last-success metrics for: {', '.join(missing)}"
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
age_by_task = {
|
||||
item.get("metric", {}).get("task"): (now - datetime.fromtimestamp(float(item["value"][1]), tz=timezone.utc)).total_seconds() / 3600
|
||||
for item in series
|
||||
}
|
||||
too_old = [
|
||||
f"{task} ({age_by_task[task]:.1f}h > {item['max_success_age_hours']}h)"
|
||||
for item in tasks
|
||||
if (task := item["task"]) in age_by_task and age_by_task[task] > float(item["max_success_age_hours"])
|
||||
]
|
||||
assert not too_old, "Ariadne schedules are stale: " + ", ".join(too_old)
|
||||
|
||||
|
||||
def test_ariadne_schedule_last_status_present_and_boolean():
|
||||
tasks = _tracked_tasks(_expected_tasks())
|
||||
selector = _task_regex(tasks)
|
||||
series = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
|
||||
seen = {item.get("metric", {}).get("task") for item in series}
|
||||
missing = [item["task"] for item in tasks if item["task"] not in seen]
|
||||
assert not missing, f"Missing last-status metrics for: {', '.join(missing)}"
|
||||
|
||||
invalid = []
|
||||
for item in series:
|
||||
task = item.get("metric", {}).get("task")
|
||||
value = float(item["value"][1])
|
||||
if value not in (0.0, 1.0):
|
||||
invalid.append(f"{task}={value}")
|
||||
assert not invalid, f"Unexpected Ariadne last-status values: {', '.join(invalid)}"
|
||||
@ -1,5 +1,3 @@
|
||||
"""Glue checks for the metrics the quality-gate publishes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
@ -25,63 +23,26 @@ def _query(promql: str) -> list[dict]:
|
||||
return payload.get("data", {}).get("result", [])
|
||||
|
||||
|
||||
def _expected_tasks() -> list[dict]:
|
||||
cfg = _load_config()
|
||||
tasks = [
|
||||
_normalize_task(item, cfg)
|
||||
for item in cfg.get("ariadne_schedule_tasks", [])
|
||||
]
|
||||
assert tasks, "No Ariadne schedule tasks configured"
|
||||
return tasks
|
||||
def test_glue_metrics_present():
|
||||
series = _query('kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}')
|
||||
assert series, "No glue cronjob label series found"
|
||||
|
||||
|
||||
def _normalize_task(item: object, cfg: dict) -> dict:
|
||||
if isinstance(item, str):
|
||||
return {
|
||||
"task": item,
|
||||
"check_last_success": True,
|
||||
"max_success_age_hours": cfg.get("max_success_age_hours", 48),
|
||||
}
|
||||
if isinstance(item, dict):
|
||||
normalized = dict(item)
|
||||
normalized.setdefault("check_last_success", True)
|
||||
normalized.setdefault("max_success_age_hours", cfg.get("max_success_age_hours", 48))
|
||||
return normalized
|
||||
raise TypeError(f"Unsupported Ariadne schedule task config entry: {item!r}")
|
||||
|
||||
|
||||
def _tracked_tasks(tasks: list[dict]) -> list[dict]:
|
||||
tracked = [item for item in tasks if item.get("check_last_success")]
|
||||
assert tracked, "No Ariadne schedule tasks are marked for success tracking"
|
||||
return tracked
|
||||
|
||||
|
||||
def _task_regex(tasks: list[dict]) -> str:
|
||||
return "|".join(item["task"] for item in tasks)
|
||||
def test_glue_metrics_success_join():
|
||||
query = (
|
||||
"kube_cronjob_status_last_successful_time "
|
||||
'and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}'
|
||||
)
|
||||
series = _query(query)
|
||||
assert series, "No glue cronjob last success series found"
|
||||
|
||||
|
||||
def test_ariadne_schedule_metrics_present():
|
||||
tasks = _expected_tasks()
|
||||
selector = _task_regex(tasks)
|
||||
series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
|
||||
seen = {item.get("metric", {}).get("task") for item in series}
|
||||
missing = [item["task"] for item in tasks if item["task"] not in seen]
|
||||
cfg = _load_config()
|
||||
expected = cfg.get("ariadne_schedule_tasks", [])
|
||||
if not expected:
|
||||
return
|
||||
series = _query("ariadne_schedule_next_run_timestamp_seconds")
|
||||
tasks = {item.get("metric", {}).get("task") for item in series}
|
||||
missing = [task for task in expected if task not in tasks]
|
||||
assert not missing, f"Missing Ariadne schedule metrics for: {', '.join(missing)}"
|
||||
|
||||
|
||||
def test_ariadne_schedule_success_and_status_metrics_present():
|
||||
tasks = _tracked_tasks(_expected_tasks())
|
||||
selector = _task_regex(tasks)
|
||||
|
||||
success = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
|
||||
status = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
|
||||
|
||||
success_tasks = {item.get("metric", {}).get("task") for item in success}
|
||||
status_tasks = {item.get("metric", {}).get("task") for item in status}
|
||||
expected = {item["task"] for item in tasks}
|
||||
|
||||
missing_success = sorted(expected - success_tasks)
|
||||
missing_status = sorted(expected - status_tasks)
|
||||
|
||||
assert not missing_success, f"Missing Ariadne success metrics for: {', '.join(missing_success)}"
|
||||
assert not missing_status, f"Missing Ariadne status metrics for: {', '.join(missing_status)}"
|
||||
|
||||
@ -1,401 +0,0 @@
|
||||
{
|
||||
"version": 1,
|
||||
"generated_from": "Jenkins titan-iac build 225 Trivy filesystem scan",
|
||||
"default_expires_at": "2026-05-22",
|
||||
"ticket": "atlas-quality-wave-k8s-hardening",
|
||||
"default_reason": "Existing Kubernetes manifest hardening baseline accepted only for the first quality-gate rollout; fix or renew explicitly before expiry.",
|
||||
"misconfigurations": [
|
||||
{
|
||||
"id": "DS-0002",
|
||||
"targets": [
|
||||
"dockerfiles/Dockerfile.ananke-node-helper"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "KSV-0009",
|
||||
"targets": [
|
||||
"services/mailu/vip-controller.yaml",
|
||||
"services/maintenance/k3s-agent-restart-daemonset.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "KSV-0010",
|
||||
"targets": [
|
||||
"services/maintenance/k3s-agent-restart-daemonset.yaml",
|
||||
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
|
||||
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
|
||||
"services/monitoring/jetson-tegrastats-exporter.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "KSV-0014",
|
||||
"targets": [
|
||||
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
|
||||
"infrastructure/core/ntp-sync-daemonset.yaml",
|
||||
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
|
||||
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
|
||||
"infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
|
||||
"infrastructure/longhorn/core/vault-sync-deployment.yaml",
|
||||
"infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
|
||||
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
|
||||
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
|
||||
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
|
||||
"infrastructure/postgres/statefulset.yaml",
|
||||
"infrastructure/vault-csi/vault-csi-provider.yaml",
|
||||
"services/ai-llm/deployment.yaml",
|
||||
"services/bstein-dev-home/backend-deployment.yaml",
|
||||
"services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
|
||||
"services/bstein-dev-home/frontend-deployment.yaml",
|
||||
"services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
|
||||
"services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
|
||||
"services/bstein-dev-home/vault-sync-deployment.yaml",
|
||||
"services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
|
||||
"services/comms/atlasbot-deployment.yaml",
|
||||
"services/comms/coturn.yaml",
|
||||
"services/comms/element-call-deployment.yaml",
|
||||
"services/comms/guest-name-job.yaml",
|
||||
"services/comms/guest-register-deployment.yaml",
|
||||
"services/comms/livekit-token-deployment.yaml",
|
||||
"services/comms/livekit.yaml",
|
||||
"services/comms/mas-deployment.yaml",
|
||||
"services/comms/oneoffs/bstein-force-leave-job.yaml",
|
||||
"services/comms/oneoffs/comms-secrets-ensure-job.yaml",
|
||||
"services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
|
||||
"services/comms/oneoffs/mas-db-ensure-job.yaml",
|
||||
"services/comms/oneoffs/mas-local-users-ensure-job.yaml",
|
||||
"services/comms/oneoffs/othrys-kick-numeric-job.yaml",
|
||||
"services/comms/oneoffs/synapse-admin-ensure-job.yaml",
|
||||
"services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
|
||||
"services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
|
||||
"services/comms/oneoffs/synapse-user-seed-job.yaml",
|
||||
"services/comms/pin-othrys-job.yaml",
|
||||
"services/comms/reset-othrys-room-job.yaml",
|
||||
"services/comms/seed-othrys-room.yaml",
|
||||
"services/comms/vault-sync-deployment.yaml",
|
||||
"services/comms/wellknown.yaml",
|
||||
"services/crypto/monerod/deployment.yaml",
|
||||
"services/crypto/wallet-monero-temp/deployment.yaml",
|
||||
"services/crypto/xmr-miner/deployment.yaml",
|
||||
"services/crypto/xmr-miner/vault-sync-deployment.yaml",
|
||||
"services/crypto/xmr-miner/xmrig-daemonset.yaml",
|
||||
"services/finance/actual-budget-deployment.yaml",
|
||||
"services/finance/firefly-cronjob.yaml",
|
||||
"services/finance/firefly-deployment.yaml",
|
||||
"services/finance/firefly-user-sync-cronjob.yaml",
|
||||
"services/finance/oneoffs/finance-secrets-ensure-job.yaml",
|
||||
"services/gitea/deployment.yaml",
|
||||
"services/harbor/vault-sync-deployment.yaml",
|
||||
"services/health/wger-admin-ensure-cronjob.yaml",
|
||||
"services/health/wger-deployment.yaml",
|
||||
"services/health/wger-user-sync-cronjob.yaml",
|
||||
"services/jellyfin/deployment.yaml",
|
||||
"services/jellyfin/loader.yaml",
|
||||
"services/jenkins/deployment.yaml",
|
||||
"services/jenkins/vault-sync-deployment.yaml",
|
||||
"services/keycloak/deployment.yaml",
|
||||
"services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/ldap-federation-job.yaml",
|
||||
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/portal-e2e-client-job.yaml",
|
||||
"services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
|
||||
"services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
|
||||
"services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
|
||||
"services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
|
||||
"services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/realm-settings-job.yaml",
|
||||
"services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/user-overrides-job.yaml",
|
||||
"services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/vault-sync-deployment.yaml",
|
||||
"services/logging/node-image-gc-rpi4-daemonset.yaml",
|
||||
"services/logging/node-image-prune-rpi5-daemonset.yaml",
|
||||
"services/logging/node-log-rotation-daemonset.yaml",
|
||||
"services/logging/oauth2-proxy.yaml",
|
||||
"services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
|
||||
"services/logging/oneoffs/opensearch-ism-job.yaml",
|
||||
"services/logging/oneoffs/opensearch-observability-setup-job.yaml",
|
||||
"services/logging/opensearch-prune-cronjob.yaml",
|
||||
"services/logging/vault-sync-deployment.yaml",
|
||||
"services/mailu/mailu-sync-cronjob.yaml",
|
||||
"services/mailu/mailu-sync-listener.yaml",
|
||||
"services/mailu/oneoffs/mailu-sync-job.yaml",
|
||||
"services/mailu/vault-sync-deployment.yaml",
|
||||
"services/mailu/vip-controller.yaml",
|
||||
"services/maintenance/ariadne-deployment.yaml",
|
||||
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
|
||||
"services/maintenance/image-sweeper-cronjob.yaml",
|
||||
"services/maintenance/k3s-agent-restart-daemonset.yaml",
|
||||
"services/maintenance/metis-deployment.yaml",
|
||||
"services/maintenance/metis-k3s-token-sync-cronjob.yaml",
|
||||
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
|
||||
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
|
||||
"services/maintenance/node-image-sweeper-daemonset.yaml",
|
||||
"services/maintenance/node-nofile-daemonset.yaml",
|
||||
"services/maintenance/oauth2-proxy-metis.yaml",
|
||||
"services/maintenance/oauth2-proxy-soteria.yaml",
|
||||
"services/maintenance/oneoffs/ariadne-migrate-job.yaml",
|
||||
"services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
|
||||
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
|
||||
"services/maintenance/pod-cleaner-cronjob.yaml",
|
||||
"services/maintenance/soteria-deployment.yaml",
|
||||
"services/maintenance/vault-sync-deployment.yaml",
|
||||
"services/monitoring/dcgm-exporter.yaml",
|
||||
"services/monitoring/jetson-tegrastats-exporter.yaml",
|
||||
"services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
|
||||
"services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
|
||||
"services/monitoring/platform-quality-gateway-deployment.yaml",
|
||||
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
|
||||
"services/monitoring/postmark-exporter-deployment.yaml",
|
||||
"services/monitoring/vault-sync-deployment.yaml",
|
||||
"services/nextcloud-mail-sync/cronjob.yaml",
|
||||
"services/nextcloud/collabora.yaml",
|
||||
"services/nextcloud/cronjob.yaml",
|
||||
"services/nextcloud/deployment.yaml",
|
||||
"services/nextcloud/maintenance-cronjob.yaml",
|
||||
"services/oauth2-proxy/deployment.yaml",
|
||||
"services/openldap/statefulset.yaml",
|
||||
"services/outline/deployment.yaml",
|
||||
"services/outline/redis-deployment.yaml",
|
||||
"services/pegasus/deployment.yaml",
|
||||
"services/pegasus/vault-sync-deployment.yaml",
|
||||
"services/planka/deployment.yaml",
|
||||
"services/quality/oauth2-proxy-sonarqube.yaml",
|
||||
"services/quality/sonarqube-deployment.yaml",
|
||||
"services/quality/sonarqube-exporter-deployment.yaml",
|
||||
"services/sui-metrics/base/deployment.yaml",
|
||||
"services/typhon/vault-sync-deployment.yaml",
|
||||
"services/vault/k8s-auth-config-cronjob.yaml",
|
||||
"services/vault/oidc-config-cronjob.yaml",
|
||||
"services/vault/statefulset.yaml",
|
||||
"services/vaultwarden/deployment.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "KSV-0017",
|
||||
"targets": [
|
||||
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
|
||||
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
|
||||
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
|
||||
"services/logging/node-image-gc-rpi4-daemonset.yaml",
|
||||
"services/logging/node-image-prune-rpi5-daemonset.yaml",
|
||||
"services/logging/node-log-rotation-daemonset.yaml",
|
||||
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
|
||||
"services/maintenance/image-sweeper-cronjob.yaml",
|
||||
"services/maintenance/k3s-agent-restart-daemonset.yaml",
|
||||
"services/maintenance/metis-deployment.yaml",
|
||||
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
|
||||
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
|
||||
"services/maintenance/node-image-sweeper-daemonset.yaml",
|
||||
"services/maintenance/node-nofile-daemonset.yaml",
|
||||
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
|
||||
"services/monitoring/dcgm-exporter.yaml",
|
||||
"services/monitoring/jetson-tegrastats-exporter.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "KSV-0041",
|
||||
"targets": [
|
||||
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
|
||||
"infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
|
||||
"infrastructure/traefik/clusterrole.yaml",
|
||||
"services/bstein-dev-home/rbac.yaml",
|
||||
"services/comms/comms-secrets-ensure-rbac.yaml",
|
||||
"services/comms/mas-db-ensure-rbac.yaml",
|
||||
"services/comms/mas-secrets-ensure-rbac.yaml",
|
||||
"services/maintenance/soteria-rbac.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "KSV-0047",
|
||||
"targets": [
|
||||
"services/monitoring/rbac.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "KSV-0053",
|
||||
"targets": [
|
||||
"services/comms/comms-secrets-ensure-rbac.yaml",
|
||||
"services/comms/mas-db-ensure-rbac.yaml",
|
||||
"services/jenkins/serviceaccount.yaml",
|
||||
"services/maintenance/ariadne-rbac.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "KSV-0056",
|
||||
"targets": [
|
||||
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml",
|
||||
"infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml",
|
||||
"services/jenkins/serviceaccount.yaml",
|
||||
"services/maintenance/disable-k3s-traefik-rbac.yaml",
|
||||
"services/maintenance/k3s-traefik-cleanup-rbac.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "KSV-0114",
|
||||
"targets": [
|
||||
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "KSV-0118",
|
||||
"targets": [
|
||||
"infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml",
|
||||
"infrastructure/core/coredns-deployment.yaml",
|
||||
"infrastructure/core/ntp-sync-daemonset.yaml",
|
||||
"infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml",
|
||||
"infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml",
|
||||
"infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml",
|
||||
"infrastructure/longhorn/core/vault-sync-deployment.yaml",
|
||||
"infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml",
|
||||
"infrastructure/modules/profiles/components/device-plugin-jetson/daemonset.yaml",
|
||||
"infrastructure/modules/profiles/components/device-plugin-minipc/daemonset.yaml",
|
||||
"infrastructure/modules/profiles/components/device-plugin-tethys/daemonset.yaml",
|
||||
"infrastructure/postgres/statefulset.yaml",
|
||||
"infrastructure/vault-csi/vault-csi-provider.yaml",
|
||||
"services/ai-llm/deployment.yaml",
|
||||
"services/bstein-dev-home/backend-deployment.yaml",
|
||||
"services/bstein-dev-home/chat-ai-gateway-deployment.yaml",
|
||||
"services/bstein-dev-home/frontend-deployment.yaml",
|
||||
"services/bstein-dev-home/oneoffs/migrations/portal-migrate-job.yaml",
|
||||
"services/bstein-dev-home/oneoffs/portal-onboarding-e2e-test-job.yaml",
|
||||
"services/bstein-dev-home/vault-sync-deployment.yaml",
|
||||
"services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml",
|
||||
"services/comms/atlasbot-deployment.yaml",
|
||||
"services/comms/coturn.yaml",
|
||||
"services/comms/element-call-deployment.yaml",
|
||||
"services/comms/guest-name-job.yaml",
|
||||
"services/comms/livekit-token-deployment.yaml",
|
||||
"services/comms/livekit.yaml",
|
||||
"services/comms/mas-deployment.yaml",
|
||||
"services/comms/oneoffs/bstein-force-leave-job.yaml",
|
||||
"services/comms/oneoffs/comms-secrets-ensure-job.yaml",
|
||||
"services/comms/oneoffs/mas-admin-client-secret-ensure-job.yaml",
|
||||
"services/comms/oneoffs/mas-db-ensure-job.yaml",
|
||||
"services/comms/oneoffs/mas-local-users-ensure-job.yaml",
|
||||
"services/comms/oneoffs/othrys-kick-numeric-job.yaml",
|
||||
"services/comms/oneoffs/synapse-admin-ensure-job.yaml",
|
||||
"services/comms/oneoffs/synapse-seeder-admin-ensure-job.yaml",
|
||||
"services/comms/oneoffs/synapse-signingkey-ensure-job.yaml",
|
||||
"services/comms/oneoffs/synapse-user-seed-job.yaml",
|
||||
"services/comms/pin-othrys-job.yaml",
|
||||
"services/comms/reset-othrys-room-job.yaml",
|
||||
"services/comms/seed-othrys-room.yaml",
|
||||
"services/comms/vault-sync-deployment.yaml",
|
||||
"services/comms/wellknown.yaml",
|
||||
"services/crypto/monerod/deployment.yaml",
|
||||
"services/crypto/wallet-monero-temp/deployment.yaml",
|
||||
"services/crypto/xmr-miner/deployment.yaml",
|
||||
"services/crypto/xmr-miner/vault-sync-deployment.yaml",
|
||||
"services/crypto/xmr-miner/xmrig-daemonset.yaml",
|
||||
"services/finance/firefly-cronjob.yaml",
|
||||
"services/finance/firefly-deployment.yaml",
|
||||
"services/finance/firefly-user-sync-cronjob.yaml",
|
||||
"services/finance/oneoffs/finance-secrets-ensure-job.yaml",
|
||||
"services/gitea/deployment.yaml",
|
||||
"services/harbor/vault-sync-deployment.yaml",
|
||||
"services/health/wger-admin-ensure-cronjob.yaml",
|
||||
"services/health/wger-deployment.yaml",
|
||||
"services/health/wger-user-sync-cronjob.yaml",
|
||||
"services/jellyfin/loader.yaml",
|
||||
"services/jenkins/deployment.yaml",
|
||||
"services/jenkins/vault-sync-deployment.yaml",
|
||||
"services/keycloak/oneoffs/actual-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/harbor-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/ldap-federation-job.yaml",
|
||||
"services/keycloak/oneoffs/logs-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/mas-secrets-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/metis-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/metis-ssh-keys-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/portal-admin-client-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/portal-e2e-client-job.yaml",
|
||||
"services/keycloak/oneoffs/portal-e2e-execute-actions-email-test-job.yaml",
|
||||
"services/keycloak/oneoffs/portal-e2e-target-client-job.yaml",
|
||||
"services/keycloak/oneoffs/portal-e2e-token-exchange-permissions-job.yaml",
|
||||
"services/keycloak/oneoffs/portal-e2e-token-exchange-test-job.yaml",
|
||||
"services/keycloak/oneoffs/quality-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/realm-settings-job.yaml",
|
||||
"services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/synapse-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/oneoffs/user-overrides-job.yaml",
|
||||
"services/keycloak/oneoffs/vault-oidc-secret-ensure-job.yaml",
|
||||
"services/keycloak/vault-sync-deployment.yaml",
|
||||
"services/logging/node-image-gc-rpi4-daemonset.yaml",
|
||||
"services/logging/node-image-prune-rpi5-daemonset.yaml",
|
||||
"services/logging/node-log-rotation-daemonset.yaml",
|
||||
"services/logging/oauth2-proxy.yaml",
|
||||
"services/logging/oneoffs/opensearch-dashboards-setup-job.yaml",
|
||||
"services/logging/oneoffs/opensearch-ism-job.yaml",
|
||||
"services/logging/oneoffs/opensearch-observability-setup-job.yaml",
|
||||
"services/logging/opensearch-prune-cronjob.yaml",
|
||||
"services/logging/vault-sync-deployment.yaml",
|
||||
"services/mailu/mailu-sync-cronjob.yaml",
|
||||
"services/mailu/mailu-sync-listener.yaml",
|
||||
"services/mailu/oneoffs/mailu-sync-job.yaml",
|
||||
"services/mailu/vault-sync-deployment.yaml",
|
||||
"services/mailu/vip-controller.yaml",
|
||||
"services/maintenance/ariadne-deployment.yaml",
|
||||
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
|
||||
"services/maintenance/image-sweeper-cronjob.yaml",
|
||||
"services/maintenance/k3s-agent-restart-daemonset.yaml",
|
||||
"services/maintenance/metis-deployment.yaml",
|
||||
"services/maintenance/metis-k3s-token-sync-cronjob.yaml",
|
||||
"services/maintenance/metis-sentinel-amd64-daemonset.yaml",
|
||||
"services/maintenance/metis-sentinel-arm64-daemonset.yaml",
|
||||
"services/maintenance/node-image-sweeper-daemonset.yaml",
|
||||
"services/maintenance/node-nofile-daemonset.yaml",
|
||||
"services/maintenance/oauth2-proxy-metis.yaml",
|
||||
"services/maintenance/oauth2-proxy-soteria.yaml",
|
||||
"services/maintenance/oneoffs/ariadne-migrate-job.yaml",
|
||||
"services/maintenance/oneoffs/k3s-traefik-cleanup-job.yaml",
|
||||
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml",
|
||||
"services/maintenance/pod-cleaner-cronjob.yaml",
|
||||
"services/maintenance/soteria-deployment.yaml",
|
||||
"services/maintenance/vault-sync-deployment.yaml",
|
||||
"services/monitoring/dcgm-exporter.yaml",
|
||||
"services/monitoring/jetson-tegrastats-exporter.yaml",
|
||||
"services/monitoring/oneoffs/grafana-org-bootstrap.yaml",
|
||||
"services/monitoring/oneoffs/grafana-user-dedupe-job.yaml",
|
||||
"services/monitoring/platform-quality-gateway-deployment.yaml",
|
||||
"services/monitoring/platform-quality-suite-probe-cronjob.yaml",
|
||||
"services/monitoring/postmark-exporter-deployment.yaml",
|
||||
"services/monitoring/vault-sync-deployment.yaml",
|
||||
"services/nextcloud/collabora.yaml",
|
||||
"services/oauth2-proxy/deployment.yaml",
|
||||
"services/openldap/statefulset.yaml",
|
||||
"services/outline/deployment.yaml",
|
||||
"services/outline/redis-deployment.yaml",
|
||||
"services/pegasus/vault-sync-deployment.yaml",
|
||||
"services/quality/oauth2-proxy-sonarqube.yaml",
|
||||
"services/quality/sonarqube-deployment.yaml",
|
||||
"services/quality/sonarqube-exporter-deployment.yaml",
|
||||
"services/sui-metrics/base/deployment.yaml",
|
||||
"services/sui-metrics/overlays/atlas/patch-node-selector.yaml",
|
||||
"services/typhon/deployment.yaml",
|
||||
"services/typhon/vault-sync-deployment.yaml",
|
||||
"services/vault/k8s-auth-config-cronjob.yaml",
|
||||
"services/vault/oidc-config-cronjob.yaml",
|
||||
"services/vaultwarden/deployment.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "KSV-0121",
|
||||
"targets": [
|
||||
"services/logging/node-image-gc-rpi4-daemonset.yaml",
|
||||
"services/logging/node-image-prune-rpi5-daemonset.yaml",
|
||||
"services/logging/node-log-rotation-daemonset.yaml",
|
||||
"services/maintenance/disable-k3s-traefik-daemonset.yaml",
|
||||
"services/maintenance/image-sweeper-cronjob.yaml",
|
||||
"services/maintenance/metis-deployment.yaml",
|
||||
"services/maintenance/node-image-sweeper-daemonset.yaml",
|
||||
"services/maintenance/node-nofile-daemonset.yaml",
|
||||
"services/maintenance/oneoffs/titan-24-rootfs-sweep-job.yaml"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -0,0 +1,26 @@
|
||||
# clusters/atlas/flux-system/applications/atlasbot/image-automation.yaml
|
||||
apiVersion: image.toolkit.fluxcd.io/v1
|
||||
kind: ImageUpdateAutomation
|
||||
metadata:
|
||||
name: atlasbot
|
||||
namespace: ai
|
||||
spec:
|
||||
interval: 1m0s
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
git:
|
||||
checkout:
|
||||
ref:
|
||||
branch: feature/atlasbot
|
||||
commit:
|
||||
author:
|
||||
email: ops@bstein.dev
|
||||
name: flux-bot
|
||||
messageTemplate: "chore(atlasbot): automated image update"
|
||||
push:
|
||||
branch: feature/atlasbot
|
||||
update:
|
||||
strategy: Setters
|
||||
path: services/atlasbot
|
||||
@ -0,0 +1,17 @@
|
||||
# clusters/atlas/flux-system/applications/atlasbot/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: atlasbot
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
path: ./services/atlasbot
|
||||
targetNamespace: ai
|
||||
timeout: 2m
|
||||
dependsOn:
|
||||
- name: ai-llm
|
||||
@ -13,14 +13,14 @@ spec:
|
||||
git:
|
||||
checkout:
|
||||
ref:
|
||||
branch: main
|
||||
branch: feature/atlasbot
|
||||
commit:
|
||||
author:
|
||||
email: ops@bstein.dev
|
||||
name: flux-bot
|
||||
messageTemplate: "chore(bstein-dev-home): automated image update"
|
||||
push:
|
||||
branch: main
|
||||
branch: feature/atlasbot
|
||||
update:
|
||||
strategy: Setters
|
||||
path: services/bstein-dev-home
|
||||
|
||||
@ -0,0 +1,26 @@
|
||||
# clusters/atlas/flux-system/applications/comms/image-automation.yaml
|
||||
apiVersion: image.toolkit.fluxcd.io/v1
|
||||
kind: ImageUpdateAutomation
|
||||
metadata:
|
||||
name: comms
|
||||
namespace: comms
|
||||
spec:
|
||||
interval: 1m0s
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
git:
|
||||
checkout:
|
||||
ref:
|
||||
branch: feature/atlasbot
|
||||
commit:
|
||||
author:
|
||||
email: ops@bstein.dev
|
||||
name: flux-bot
|
||||
messageTemplate: "chore(comms): automated image update"
|
||||
push:
|
||||
branch: feature/atlasbot
|
||||
update:
|
||||
strategy: Setters
|
||||
path: services/comms
|
||||
@ -13,8 +13,4 @@ spec:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
dependsOn:
|
||||
- name: longhorn
|
||||
- name: vault
|
||||
- name: postgres
|
||||
wait: true
|
||||
|
||||
@ -16,6 +16,3 @@ spec:
|
||||
wait: false
|
||||
dependsOn:
|
||||
- name: core
|
||||
- name: longhorn
|
||||
- name: vault
|
||||
- name: postgres
|
||||
|
||||
@ -25,4 +25,3 @@ spec:
|
||||
name: jenkins
|
||||
namespace: jenkins
|
||||
wait: false
|
||||
timeout: 20m
|
||||
|
||||
@ -12,8 +12,4 @@ spec:
|
||||
name: flux-system
|
||||
path: ./services/keycloak
|
||||
targetNamespace: sso
|
||||
dependsOn:
|
||||
- name: longhorn
|
||||
- name: vault
|
||||
- name: postgres
|
||||
timeout: 2m
|
||||
|
||||
@ -6,6 +6,9 @@ resources:
|
||||
- vault/kustomization.yaml
|
||||
- vaultwarden/kustomization.yaml
|
||||
- comms/kustomization.yaml
|
||||
- comms/image-automation.yaml
|
||||
- atlasbot/kustomization.yaml
|
||||
- atlasbot/image-automation.yaml
|
||||
- crypto/kustomization.yaml
|
||||
- monerod/kustomization.yaml
|
||||
- pegasus/kustomization.yaml
|
||||
@ -21,12 +24,10 @@ resources:
|
||||
- sui-metrics/kustomization.yaml
|
||||
- openldap/kustomization.yaml
|
||||
- keycloak/kustomization.yaml
|
||||
- quality/kustomization.yaml
|
||||
- oauth2-proxy/kustomization.yaml
|
||||
- mailu/kustomization.yaml
|
||||
- jenkins/kustomization.yaml
|
||||
- ai-llm/kustomization.yaml
|
||||
- typhon/kustomization.yaml
|
||||
- nextcloud/kustomization.yaml
|
||||
- nextcloud-mail-sync/kustomization.yaml
|
||||
- outline/kustomization.yaml
|
||||
|
||||
@ -16,4 +16,4 @@ spec:
|
||||
dependsOn:
|
||||
- name: crypto
|
||||
wait: true
|
||||
timeout: 15m
|
||||
timeout: 5m
|
||||
|
||||
@ -1,35 +0,0 @@
|
||||
# clusters/atlas/flux-system/applications/quality/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: quality
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
path: ./services/quality
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
targetNamespace: quality
|
||||
dependsOn:
|
||||
- name: traefik
|
||||
- name: cert-manager
|
||||
- name: keycloak
|
||||
- name: vault
|
||||
- name: postgres
|
||||
healthChecks:
|
||||
- apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: sonarqube
|
||||
namespace: quality
|
||||
- apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: sonarqube-exporter
|
||||
namespace: quality
|
||||
- apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: oauth2-proxy-sonarqube
|
||||
namespace: quality
|
||||
wait: false
|
||||
timeout: 20m
|
||||
@ -1,29 +0,0 @@
|
||||
# clusters/atlas/flux-system/applications/typhon/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: typhon
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
path: ./services/typhon
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
targetNamespace: climate
|
||||
dependsOn:
|
||||
- name: vault
|
||||
- name: vault-csi
|
||||
- name: monitoring
|
||||
healthChecks:
|
||||
- apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: typhon
|
||||
namespace: climate
|
||||
- apiVersion: v1
|
||||
kind: Service
|
||||
name: typhon
|
||||
namespace: climate
|
||||
wait: false
|
||||
timeout: 20m
|
||||
@ -15,5 +15,4 @@ spec:
|
||||
prune: true
|
||||
wait: true
|
||||
dependsOn:
|
||||
- name: longhorn
|
||||
- name: helm
|
||||
|
||||
@ -17,4 +17,3 @@ spec:
|
||||
- name: crypto
|
||||
- name: monerod
|
||||
wait: true
|
||||
timeout: 30m
|
||||
|
||||
@ -9,7 +9,7 @@ metadata:
|
||||
spec:
|
||||
interval: 1m0s
|
||||
ref:
|
||||
branch: main
|
||||
branch: feature/atlasbot
|
||||
secretRef:
|
||||
name: flux-system-gitea
|
||||
url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git
|
||||
|
||||
@ -16,5 +16,6 @@ resources:
|
||||
- longhorn/kustomization.yaml
|
||||
- longhorn-ui/kustomization.yaml
|
||||
- postgres/kustomization.yaml
|
||||
- nats/kustomization.yaml
|
||||
- ../platform/vault-csi/kustomization.yaml
|
||||
- ../platform/vault-injector/kustomization.yaml
|
||||
|
||||
@ -13,14 +13,14 @@ spec:
|
||||
git:
|
||||
checkout:
|
||||
ref:
|
||||
branch: main
|
||||
branch: feature/atlasbot
|
||||
commit:
|
||||
author:
|
||||
email: ops@bstein.dev
|
||||
name: flux-bot
|
||||
messageTemplate: "chore(maintenance): automated image update"
|
||||
push:
|
||||
branch: main
|
||||
branch: feature/atlasbot
|
||||
update:
|
||||
strategy: Setters
|
||||
path: services/maintenance
|
||||
|
||||
21
clusters/atlas/flux-system/platform/nats/kustomization.yaml
Normal file
21
clusters/atlas/flux-system/platform/nats/kustomization.yaml
Normal file
@ -0,0 +1,21 @@
|
||||
# clusters/atlas/flux-system/platform/nats/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: nats
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
path: ./infrastructure/nats
|
||||
prune: true
|
||||
force: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
targetNamespace: nats
|
||||
healthChecks:
|
||||
- apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
name: nats
|
||||
namespace: nats
|
||||
wait: true
|
||||
@ -14,7 +14,6 @@ spec:
|
||||
name: flux-system
|
||||
targetNamespace: postgres
|
||||
dependsOn:
|
||||
- name: longhorn
|
||||
- name: vault
|
||||
- name: vault-csi
|
||||
healthChecks:
|
||||
|
||||
@ -1,12 +0,0 @@
|
||||
FROM debian:bookworm-slim
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
bash \
|
||||
ca-certificates \
|
||||
curl \
|
||||
util-linux \
|
||||
zstd \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
CMD ["/bin/sh"]
|
||||
@ -2,8 +2,4 @@ FROM python:3.11-slim
|
||||
|
||||
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
|
||||
RUN pip install --no-cache-dir requests psycopg2-binary \
|
||||
&& groupadd --system guest-tools \
|
||||
&& useradd --system --uid 65532 --gid guest-tools --home-dir /nonexistent --shell /usr/sbin/nologin guest-tools
|
||||
|
||||
USER guest-tools
|
||||
RUN pip install --no-cache-dir requests psycopg2-binary
|
||||
|
||||
@ -1,8 +1,16 @@
|
||||
# Use the mirrored Harbor artifact so CI does not depend on Docker Hub egress.
|
||||
FROM registry.bstein.dev/streaming/data-prepper@sha256:32ac6ad42e0f12da08bebee307e290b17d127b30def9b06eeaffbcbbc5033e83
|
||||
FROM --platform=$BUILDPLATFORM opensearchproject/data-prepper:2.8.0 AS source
|
||||
|
||||
FROM --platform=$TARGETPLATFORM eclipse-temurin:17-jre
|
||||
|
||||
ENV DATA_PREPPER_PATH=/usr/share/data-prepper
|
||||
|
||||
RUN useradd -u 10001 -M -U -d / -s /usr/sbin/nologin data_prepper \
|
||||
&& mkdir -p /var/log/data-prepper
|
||||
|
||||
COPY --from=source /usr/share/data-prepper /usr/share/data-prepper
|
||||
|
||||
RUN chown -R 10001:10001 /usr/share/data-prepper /var/log/data-prepper
|
||||
|
||||
USER 10001
|
||||
WORKDIR /usr/share/data-prepper
|
||||
CMD ["bin/data-prepper"]
|
||||
|
||||
@ -1,13 +1,10 @@
|
||||
FROM ghcr.io/element-hq/lk-jwt-service:0.3.0 AS base
|
||||
|
||||
FROM alpine:3.20
|
||||
RUN apk add --no-cache ca-certificates \
|
||||
&& addgroup -S livekit-token \
|
||||
&& adduser -S -D -H -u 65532 -G livekit-token livekit-token
|
||||
RUN apk add --no-cache ca-certificates
|
||||
COPY --from=base /lk-jwt-service /lk-jwt-service
|
||||
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
|
||||
RUN chmod 0755 /entrypoint.sh
|
||||
|
||||
USER livekit-token
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
CMD ["/lk-jwt-service"]
|
||||
|
||||
@ -29,12 +29,10 @@ FROM ${DEBIAN_IMAGE}
|
||||
RUN set -eux; \
|
||||
apt-get update; \
|
||||
apt-get install -y --no-install-recommends ca-certificates; \
|
||||
update-ca-certificates; rm -rf /var/lib/apt/lists/*; \
|
||||
groupadd --system p2pool; \
|
||||
useradd --system --uid 65532 --gid p2pool --home-dir /nonexistent --shell /usr/sbin/nologin p2pool
|
||||
update-ca-certificates; rm -rf /var/lib/apt/lists/*
|
||||
COPY --from=fetch /out/p2pool /usr/local/bin/p2pool
|
||||
|
||||
RUN /usr/local/bin/p2pool --version || true
|
||||
EXPOSE 3333
|
||||
USER p2pool
|
||||
ENTRYPOINT ["/usr/local/bin/p2pool"]
|
||||
|
||||
|
||||
@ -26,12 +26,9 @@ RUN set -eux; \
|
||||
curl -fsSL "$URL" -o /opt/monero/monero.tar.bz2; \
|
||||
tar -xjf /opt/monero/monero.tar.bz2 -C /opt/monero --strip-components=1; \
|
||||
install -m 0755 /opt/monero/monero-wallet-rpc /usr/local/bin/monero-wallet-rpc; \
|
||||
rm -f /opt/monero/monero.tar.bz2; \
|
||||
groupadd --system monero; \
|
||||
useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero
|
||||
rm -f /opt/monero/monero.tar.bz2
|
||||
|
||||
ENV PATH="/usr/local/bin:/usr/bin:/bin"
|
||||
RUN /usr/local/bin/monero-wallet-rpc --version || true
|
||||
|
||||
EXPOSE 18083
|
||||
USER monero
|
||||
|
||||
@ -23,14 +23,10 @@ RUN set -eux; \
|
||||
mkdir -p /opt/monero; \
|
||||
tar -xjf /tmp/monero.tar.bz2 -C /opt/monero --strip-components=1; \
|
||||
rm -f /tmp/monero.tar.bz2; \
|
||||
groupadd --system monero; \
|
||||
useradd --system --uid 1000 --gid monero --home-dir /nonexistent --shell /usr/sbin/nologin monero; \
|
||||
mkdir -p /data; \
|
||||
chown monero:monero /data; \
|
||||
chmod 0770 /data
|
||||
|
||||
ENV LD_LIBRARY_PATH=/opt/monero:/opt/monero/lib \
|
||||
PATH="/opt/monero:${PATH}"
|
||||
|
||||
USER monero
|
||||
CMD ["/opt/monero/monerod", "--version"]
|
||||
|
||||
@ -1,13 +1,10 @@
|
||||
FROM quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 AS base
|
||||
|
||||
FROM alpine:3.20
|
||||
RUN apk add --no-cache ca-certificates \
|
||||
&& addgroup -S oauth2-proxy \
|
||||
&& adduser -S -D -H -u 65532 -G oauth2-proxy oauth2-proxy
|
||||
RUN apk add --no-cache ca-certificates
|
||||
COPY --from=base /bin/oauth2-proxy /bin/oauth2-proxy
|
||||
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
|
||||
RUN chmod 0755 /entrypoint.sh
|
||||
|
||||
USER oauth2-proxy
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
CMD ["/bin/oauth2-proxy"]
|
||||
|
||||
@ -1,13 +1,10 @@
|
||||
FROM registry.bstein.dev/streaming/pegasus:1.2.32 AS base
|
||||
|
||||
FROM alpine:3.20
|
||||
RUN apk add --no-cache ca-certificates \
|
||||
&& addgroup -S pegasus \
|
||||
&& adduser -S -D -H -u 65532 -G pegasus pegasus
|
||||
RUN apk add --no-cache ca-certificates
|
||||
COPY --from=base /pegasus /pegasus
|
||||
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
|
||||
RUN chmod 0755 /entrypoint.sh
|
||||
|
||||
USER pegasus
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
CMD ["/pegasus"]
|
||||
|
||||
@ -1,48 +0,0 @@
|
||||
# dockerfiles/Dockerfile.quality-tools
|
||||
FROM debian:bookworm-slim
|
||||
|
||||
ARG SONAR_SCANNER_VERSION=8.0.1.6346
|
||||
ARG TRIVY_VERSION=0.70.0
|
||||
ENV TRIVY_CACHE_DIR=/opt/trivy-cache
|
||||
|
||||
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
bash \
|
||||
ca-certificates \
|
||||
curl \
|
||||
git \
|
||||
jq \
|
||||
unzip \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& groupadd --system quality-tools \
|
||||
&& useradd --system --uid 65532 --gid quality-tools --home-dir /nonexistent --shell /usr/sbin/nologin quality-tools
|
||||
|
||||
RUN set -eux; \
|
||||
scanner_zip="sonar-scanner-cli-${SONAR_SCANNER_VERSION}-linux-aarch64.zip"; \
|
||||
base_url="https://binaries.sonarsource.com/Distribution/sonar-scanner-cli"; \
|
||||
curl -fsSL "${base_url}/${scanner_zip}" -o "/tmp/${scanner_zip}"; \
|
||||
curl -fsSL "${base_url}/${scanner_zip}.sha256" -o "/tmp/${scanner_zip}.sha256"; \
|
||||
printf '%s %s\n' "$(cat "/tmp/${scanner_zip}.sha256")" "/tmp/${scanner_zip}" | sha256sum -c -; \
|
||||
unzip -q "/tmp/${scanner_zip}" -d /opt; \
|
||||
ln -s "/opt/sonar-scanner-${SONAR_SCANNER_VERSION}-linux-aarch64/bin/sonar-scanner" /usr/local/bin/sonar-scanner; \
|
||||
rm -f "/tmp/${scanner_zip}" "/tmp/${scanner_zip}.sha256"
|
||||
|
||||
RUN set -eux; \
|
||||
trivy_tgz="trivy_${TRIVY_VERSION}_Linux-ARM64.tar.gz"; \
|
||||
curl -fsSL "https://github.com/aquasecurity/trivy/releases/download/v${TRIVY_VERSION}/${trivy_tgz}" -o "/tmp/${trivy_tgz}"; \
|
||||
tar -C /usr/local/bin -xzf "/tmp/${trivy_tgz}" trivy; \
|
||||
rm -f "/tmp/${trivy_tgz}"; \
|
||||
trivy --version; \
|
||||
sonar-scanner -v
|
||||
|
||||
RUN set -eux; \
|
||||
mkdir -p "${TRIVY_CACHE_DIR}"; \
|
||||
trivy image --download-db-only --cache-dir "${TRIVY_CACHE_DIR}"; \
|
||||
chmod -R a+rX "${TRIVY_CACHE_DIR}"; \
|
||||
mkdir -p /workspace; \
|
||||
chown quality-tools:quality-tools /workspace
|
||||
|
||||
WORKDIR /workspace
|
||||
USER quality-tools
|
||||
3
dockerfiles/Dockerfile.synapse-admin-ensure
Normal file
3
dockerfiles/Dockerfile.synapse-admin-ensure
Normal file
@ -0,0 +1,3 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
RUN pip install --no-cache-dir psycopg2-binary bcrypt
|
||||
@ -27,42 +27,10 @@ spec:
|
||||
timeout: 10m
|
||||
values:
|
||||
installCRDs: true
|
||||
extraArgs:
|
||||
- --acme-http01-solver-nameservers=1.1.1.1:53,8.8.8.8:53
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: atlas.bstein.dev/spillover
|
||||
operator: DoesNotExist
|
||||
- weight: 95
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: NotIn
|
||||
values:
|
||||
- titan-13
|
||||
- titan-15
|
||||
- titan-17
|
||||
- titan-19
|
||||
- weight: 90
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values:
|
||||
- rpi5
|
||||
- weight: 50
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values:
|
||||
- rpi4
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
@ -76,36 +44,6 @@ spec:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: atlas.bstein.dev/spillover
|
||||
operator: DoesNotExist
|
||||
- weight: 95
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: NotIn
|
||||
values:
|
||||
- titan-13
|
||||
- titan-15
|
||||
- titan-17
|
||||
- titan-19
|
||||
- weight: 90
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values:
|
||||
- rpi5
|
||||
- weight: 50
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values:
|
||||
- rpi4
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
@ -119,36 +57,6 @@ spec:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: atlas.bstein.dev/spillover
|
||||
operator: DoesNotExist
|
||||
- weight: 95
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: NotIn
|
||||
values:
|
||||
- titan-13
|
||||
- titan-15
|
||||
- titan-17
|
||||
- titan-19
|
||||
- weight: 90
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values:
|
||||
- rpi5
|
||||
- weight: 50
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values:
|
||||
- rpi4
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
|
||||
@ -26,7 +26,7 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: coredns
|
||||
image: registry.k8s.io/coredns/coredns:v1.12.1
|
||||
image: registry.bstein.dev/infra/coredns:1.12.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
- -conf
|
||||
|
||||
@ -6,6 +6,7 @@ resources:
|
||||
- ../modules/profiles/atlas-ha
|
||||
- coredns-custom.yaml
|
||||
- coredns-deployment.yaml
|
||||
- longhorn-node-taints.yaml
|
||||
- ntp-sync-daemonset.yaml
|
||||
- ../sources/cert-manager/letsencrypt.yaml
|
||||
- ../sources/cert-manager/letsencrypt-prod.yaml
|
||||
|
||||
40
infrastructure/core/longhorn-node-taints.yaml
Normal file
40
infrastructure/core/longhorn-node-taints.yaml
Normal file
@ -0,0 +1,40 @@
|
||||
# infrastructure/core/longhorn-node-taints.yaml
|
||||
apiVersion: v1
|
||||
kind: Node
|
||||
metadata:
|
||||
name: titan-13
|
||||
spec:
|
||||
taints:
|
||||
- key: longhorn
|
||||
value: "true"
|
||||
effect: PreferNoSchedule
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Node
|
||||
metadata:
|
||||
name: titan-15
|
||||
spec:
|
||||
taints:
|
||||
- key: longhorn
|
||||
value: "true"
|
||||
effect: PreferNoSchedule
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Node
|
||||
metadata:
|
||||
name: titan-17
|
||||
spec:
|
||||
taints:
|
||||
- key: longhorn
|
||||
value: "true"
|
||||
effect: PreferNoSchedule
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Node
|
||||
metadata:
|
||||
name: titan-19
|
||||
spec:
|
||||
taints:
|
||||
- key: longhorn
|
||||
value: "true"
|
||||
effect: PreferNoSchedule
|
||||
10
infrastructure/longhorn/core/backup-target.yaml
Normal file
10
infrastructure/longhorn/core/backup-target.yaml
Normal file
@ -0,0 +1,10 @@
|
||||
# infrastructure/longhorn/core/backup-target.yaml
|
||||
apiVersion: longhorn.io/v1beta2
|
||||
kind: BackupTarget
|
||||
metadata:
|
||||
name: default
|
||||
namespace: longhorn-system
|
||||
spec:
|
||||
backupTargetURL: "s3://atlas-soteria@us-west-004/"
|
||||
credentialSecret: longhorn-backup-b2
|
||||
pollInterval: 5m0s
|
||||
@ -6,6 +6,39 @@ metadata:
|
||||
namespace: longhorn-system
|
||||
spec:
|
||||
interval: 30m
|
||||
postRenderers:
|
||||
- kustomize:
|
||||
patches:
|
||||
- target:
|
||||
kind: Service
|
||||
name: longhorn-conversion-webhook
|
||||
namespace: longhorn-system
|
||||
patch: |
|
||||
- op: add
|
||||
path: /spec/publishNotReadyAddresses
|
||||
value: true
|
||||
- target:
|
||||
kind: Service
|
||||
name: longhorn-admission-webhook
|
||||
namespace: longhorn-system
|
||||
patch: |
|
||||
- op: add
|
||||
path: /spec/publishNotReadyAddresses
|
||||
value: true
|
||||
- target:
|
||||
kind: DaemonSet
|
||||
name: longhorn-manager
|
||||
namespace: longhorn-system
|
||||
patch: |
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/readinessProbe/httpGet/path
|
||||
value: /v1/healthz
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/readinessProbe/httpGet/port
|
||||
value: 9500
|
||||
- op: replace
|
||||
path: /spec/template/spec/containers/0/readinessProbe/httpGet/scheme
|
||||
value: HTTP
|
||||
chart:
|
||||
spec:
|
||||
chart: longhorn
|
||||
@ -26,9 +59,6 @@ spec:
|
||||
cleanupOnFail: true
|
||||
timeout: 15m
|
||||
values:
|
||||
global:
|
||||
nodeSelector:
|
||||
longhorn-host: "true"
|
||||
service:
|
||||
ui:
|
||||
type: NodePort
|
||||
@ -37,7 +67,7 @@ spec:
|
||||
createSecret: false
|
||||
registrySecret: longhorn-registry
|
||||
image:
|
||||
pullPolicy: Always
|
||||
pullPolicy: IfNotPresent
|
||||
longhorn:
|
||||
engine:
|
||||
repository: registry.bstein.dev/infra/longhorn-engine
|
||||
@ -80,13 +110,4 @@ spec:
|
||||
repository: registry.bstein.dev/infra/longhorn-livenessprobe
|
||||
tag: v2.16.0
|
||||
defaultSettings:
|
||||
systemManagedPodsImagePullPolicy: Always
|
||||
longhornManager:
|
||||
nodeSelector:
|
||||
longhorn-host: "true"
|
||||
longhornDriver:
|
||||
nodeSelector:
|
||||
longhorn-host: "true"
|
||||
longhornUI:
|
||||
nodeSelector:
|
||||
longhorn-host: "true"
|
||||
systemManagedPodsImagePullPolicy: IfNotPresent
|
||||
|
||||
@ -6,17 +6,14 @@ resources:
|
||||
- vault-serviceaccount.yaml
|
||||
- secretproviderclass.yaml
|
||||
- vault-sync-deployment.yaml
|
||||
- backup-target.yaml
|
||||
- helmrelease.yaml
|
||||
- longhorn-settings-ensure-job.yaml
|
||||
- longhorn-disk-tags-ensure-job.yaml
|
||||
|
||||
configMapGenerator:
|
||||
- name: longhorn-settings-ensure-script
|
||||
files:
|
||||
- longhorn_settings_ensure.sh=scripts/longhorn_settings_ensure.sh
|
||||
- name: longhorn-disk-tags-ensure-script
|
||||
files:
|
||||
- longhorn_disk_tags_ensure.py=scripts/longhorn_disk_tags_ensure.py
|
||||
|
||||
generatorOptions:
|
||||
disableNameSuffixHash: true
|
||||
|
||||
@ -1,36 +0,0 @@
|
||||
# infrastructure/longhorn/core/longhorn-disk-tags-ensure-job.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: longhorn-disk-tags-ensure-1
|
||||
namespace: longhorn-system
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
ttlSecondsAfterFinished: 3600
|
||||
template:
|
||||
spec:
|
||||
serviceAccountName: longhorn-service-account
|
||||
restartPolicy: Never
|
||||
volumes:
|
||||
- name: longhorn-disk-tags-ensure-script
|
||||
configMap:
|
||||
name: longhorn-disk-tags-ensure-script
|
||||
defaultMode: 0555
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/arch
|
||||
operator: In
|
||||
values: ["arm64"]
|
||||
- key: node-role.kubernetes.io/worker
|
||||
operator: Exists
|
||||
containers:
|
||||
- name: apply
|
||||
image: python:3.12.9-alpine3.20
|
||||
command: ["python", "/scripts/longhorn_disk_tags_ensure.py"]
|
||||
volumeMounts:
|
||||
- name: longhorn-disk-tags-ensure-script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
@ -2,11 +2,10 @@
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: longhorn-settings-ensure-7
|
||||
name: longhorn-settings-ensure-4
|
||||
namespace: longhorn-system
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
activeDeadlineSeconds: 240
|
||||
ttlSecondsAfterFinished: 3600
|
||||
template:
|
||||
spec:
|
||||
|
||||
@ -1,100 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Reconcile Longhorn disk tags for the Titan longhorn storage classes.
|
||||
|
||||
The astreae/asteria storageclasses select Longhorn disks by tag. The current
|
||||
nodes already have the right disk paths, but the tag fields can drift to empty
|
||||
after node recovery. This job patches the live Longhorn Node CRs back to the
|
||||
expected tags so PVC provisioning keeps working.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import ssl
|
||||
import urllib.request
|
||||
|
||||
|
||||
LONGHORN_NS = "longhorn-system"
|
||||
LONGHORN_API = "/apis/longhorn.io/v1beta2/namespaces/{namespace}/nodes"
|
||||
DESIRED_TAGS = {
|
||||
"/mnt/astreae": "astreae",
|
||||
"/mnt/asteria": "asteria",
|
||||
}
|
||||
|
||||
|
||||
def api_base() -> str:
|
||||
host = os.environ.get("KUBERNETES_SERVICE_HOST")
|
||||
port = os.environ.get("KUBERNETES_SERVICE_PORT", "443")
|
||||
if not host:
|
||||
raise SystemExit("missing KUBERNETES_SERVICE_HOST")
|
||||
return f"https://{host}:{port}"
|
||||
|
||||
|
||||
def token() -> str:
|
||||
path = "/var/run/secrets/kubernetes.io/serviceaccount/token"
|
||||
with open(path, "r", encoding="utf-8") as fh:
|
||||
return fh.read().strip()
|
||||
|
||||
|
||||
def ca_context() -> ssl.SSLContext:
|
||||
cafile = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt"
|
||||
return ssl.create_default_context(cafile=cafile)
|
||||
|
||||
|
||||
def request_json(method: str, path: str, body: dict | None = None) -> dict:
|
||||
req = urllib.request.Request(
|
||||
f"{api_base()}{path}",
|
||||
method=method,
|
||||
headers={
|
||||
"Authorization": f"Bearer {token()}",
|
||||
"Content-Type": "application/merge-patch+json",
|
||||
"Accept": "application/json",
|
||||
},
|
||||
data=None if body is None else json.dumps(body).encode("utf-8"),
|
||||
)
|
||||
with urllib.request.urlopen(req, context=ca_context(), timeout=20) as resp:
|
||||
payload = resp.read()
|
||||
return json.loads(payload) if payload else {}
|
||||
|
||||
|
||||
def list_nodes() -> list[dict]:
|
||||
data = request_json("GET", LONGHORN_API.format(namespace=LONGHORN_NS))
|
||||
return data.get("items", [])
|
||||
|
||||
|
||||
def patch_disk_tags(node_name: str, disk_name: str, desired_tag: str) -> None:
|
||||
body = {"spec": {"disks": {disk_name: {"tags": [desired_tag]}}}}
|
||||
request_json(
|
||||
"PATCH",
|
||||
f"{LONGHORN_API.format(namespace=LONGHORN_NS)}/{node_name}",
|
||||
body=body,
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
changed = 0
|
||||
skipped = 0
|
||||
|
||||
for node in list_nodes():
|
||||
name = node.get("metadata", {}).get("name", "")
|
||||
spec_disks = node.get("spec", {}).get("disks", {}) or {}
|
||||
for disk_name, disk in spec_disks.items():
|
||||
disk_path = disk.get("path")
|
||||
desired_tag = DESIRED_TAGS.get(disk_path)
|
||||
if not desired_tag:
|
||||
continue
|
||||
current_tags = disk.get("tags") or []
|
||||
if current_tags == [desired_tag]:
|
||||
skipped += 1
|
||||
continue
|
||||
print(f"patching {name}:{disk_name} path={disk_path} tags={current_tags!r} -> {[desired_tag]!r}")
|
||||
patch_disk_tags(name, disk_name, desired_tag)
|
||||
changed += 1
|
||||
|
||||
print(f"done: changed={changed} skipped={skipped}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@ -4,12 +4,11 @@ set -eu
|
||||
# Longhorn blocks direct CR patches for some settings; use the internal API instead.
|
||||
|
||||
api_base="http://longhorn-backend.longhorn-system.svc:9500/v1/settings"
|
||||
curl_opts="-fsS --connect-timeout 3 --max-time 15"
|
||||
|
||||
wait_for_api() {
|
||||
attempts=30
|
||||
while [ "${attempts}" -gt 0 ]; do
|
||||
if curl ${curl_opts} "${api_base}" >/dev/null 2>&1; then
|
||||
if curl -fsS "${api_base}" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
attempts=$((attempts - 1))
|
||||
@ -23,14 +22,14 @@ update_setting() {
|
||||
name="$1"
|
||||
value="$2"
|
||||
|
||||
current="$(curl ${curl_opts} "${api_base}/${name}" || true)"
|
||||
current="$(curl -fsS "${api_base}/${name}" || true)"
|
||||
if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then
|
||||
echo "Setting ${name} already set."
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "Setting ${name} -> ${value}"
|
||||
curl ${curl_opts} -X PUT \
|
||||
curl -fsS -X PUT \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"value\":\"${value}\"}" \
|
||||
"${api_base}/${name}" >/dev/null
|
||||
@ -41,7 +40,3 @@ update_setting default-engine-image "registry.bstein.dev/infra/longhorn-engine:v
|
||||
update_setting default-instance-manager-image "registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2"
|
||||
update_setting default-backing-image-manager-image "registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2"
|
||||
update_setting support-bundle-manager-image "registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56"
|
||||
# Keep storage-heavy nodes from getting hammered by rebuild storms and skew.
|
||||
update_setting replica-auto-balance "best-effort"
|
||||
update_setting concurrent-replica-rebuild-per-node-limit "2"
|
||||
update_setting node-down-pod-deletion-policy "delete-both-statefulset-and-deployment-pod"
|
||||
|
||||
@ -13,13 +13,13 @@ spec:
|
||||
- objectName: "harbor-pull__dockerconfigjson"
|
||||
secretPath: "kv/data/atlas/shared/harbor-pull"
|
||||
secretKey: "dockerconfigjson"
|
||||
- objectName: "longhorn-backup-b2__AWS_ACCESS_KEY_ID"
|
||||
- objectName: "longhorn_backup__AWS_ACCESS_KEY_ID"
|
||||
secretPath: "kv/data/atlas/longhorn/backup-b2"
|
||||
secretKey: "AWS_ACCESS_KEY_ID"
|
||||
- objectName: "longhorn-backup-b2__AWS_SECRET_ACCESS_KEY"
|
||||
- objectName: "longhorn_backup__AWS_SECRET_ACCESS_KEY"
|
||||
secretPath: "kv/data/atlas/longhorn/backup-b2"
|
||||
secretKey: "AWS_SECRET_ACCESS_KEY"
|
||||
- objectName: "longhorn-backup-b2__AWS_ENDPOINTS"
|
||||
- objectName: "longhorn_backup__AWS_ENDPOINTS"
|
||||
secretPath: "kv/data/atlas/longhorn/backup-b2"
|
||||
secretKey: "AWS_ENDPOINTS"
|
||||
secretObjects:
|
||||
@ -31,9 +31,9 @@ spec:
|
||||
- secretName: longhorn-backup-b2
|
||||
type: Opaque
|
||||
data:
|
||||
- objectName: longhorn-backup-b2__AWS_ACCESS_KEY_ID
|
||||
- objectName: longhorn_backup__AWS_ACCESS_KEY_ID
|
||||
key: AWS_ACCESS_KEY_ID
|
||||
- objectName: longhorn-backup-b2__AWS_SECRET_ACCESS_KEY
|
||||
- objectName: longhorn_backup__AWS_SECRET_ACCESS_KEY
|
||||
key: AWS_SECRET_ACCESS_KEY
|
||||
- objectName: longhorn-backup-b2__AWS_ENDPOINTS
|
||||
- objectName: longhorn_backup__AWS_ENDPOINTS
|
||||
key: AWS_ENDPOINTS
|
||||
|
||||
@ -26,16 +26,6 @@ spec:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi5", "rpi4"]
|
||||
- weight: 90
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: NotIn
|
||||
values:
|
||||
- titan-13
|
||||
- titan-15
|
||||
- titan-17
|
||||
- titan-19
|
||||
containers:
|
||||
- name: sync
|
||||
image: alpine:3.20
|
||||
|
||||
@ -78,7 +78,6 @@ spec:
|
||||
- --upstream=http://longhorn-frontend.longhorn-system.svc.cluster.local
|
||||
- --http-address=0.0.0.0:4180
|
||||
- --skip-provider-button=true
|
||||
- --approval-prompt=auto
|
||||
- --skip-jwt-bearer-tokens=true
|
||||
- --oidc-groups-claim=groups
|
||||
- --cookie-domain=longhorn.bstein.dev
|
||||
|
||||
17
infrastructure/nats/configmap.yaml
Normal file
17
infrastructure/nats/configmap.yaml
Normal file
@ -0,0 +1,17 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: nats-config
|
||||
namespace: nats
|
||||
labels:
|
||||
app: nats
|
||||
component: config
|
||||
annotations:
|
||||
description: "NATS JetStream configuration"
|
||||
data:
|
||||
nats.conf: |
|
||||
jetstream {
|
||||
store_dir: /data
|
||||
max_mem_store: 128MB
|
||||
max_file_store: 1GB
|
||||
}
|
||||
7
infrastructure/nats/kustomization.yaml
Normal file
7
infrastructure/nats/kustomization.yaml
Normal file
@ -0,0 +1,7 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- configmap.yaml
|
||||
- service.yaml
|
||||
- statefulset.yaml
|
||||
4
infrastructure/nats/namespace.yaml
Normal file
4
infrastructure/nats/namespace.yaml
Normal file
@ -0,0 +1,4 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: nats
|
||||
17
infrastructure/nats/service.yaml
Normal file
17
infrastructure/nats/service.yaml
Normal file
@ -0,0 +1,17 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: nats
|
||||
namespace: nats
|
||||
labels:
|
||||
app: nats
|
||||
spec:
|
||||
selector:
|
||||
app: nats
|
||||
ports:
|
||||
- name: client
|
||||
port: 4222
|
||||
targetPort: 4222
|
||||
- name: monitoring
|
||||
port: 8222
|
||||
targetPort: 8222
|
||||
54
infrastructure/nats/statefulset.yaml
Normal file
54
infrastructure/nats/statefulset.yaml
Normal file
@ -0,0 +1,54 @@
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
name: nats
|
||||
namespace: nats
|
||||
labels:
|
||||
app: nats
|
||||
spec:
|
||||
serviceName: nats
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: nats
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: nats
|
||||
spec:
|
||||
containers:
|
||||
- name: nats
|
||||
image: nats:2.10.18
|
||||
args:
|
||||
- "-c"
|
||||
- "/etc/nats/nats.conf"
|
||||
ports:
|
||||
- name: client
|
||||
containerPort: 4222
|
||||
- name: monitoring
|
||||
containerPort: 8222
|
||||
volumeMounts:
|
||||
- name: config
|
||||
mountPath: /etc/nats
|
||||
- name: data
|
||||
mountPath: /data
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
volumes:
|
||||
- name: config
|
||||
configMap:
|
||||
name: nats-config
|
||||
volumeClaimTemplates:
|
||||
- metadata:
|
||||
name: data
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 2Gi
|
||||
@ -25,7 +25,6 @@ spec:
|
||||
serviceAccountName: postgres-vault
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
hardware: rpi5
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
@ -36,17 +35,7 @@ spec:
|
||||
values: ["true"]
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi5"]
|
||||
- key: kubernetes.io/hostname
|
||||
operator: NotIn
|
||||
values: ["titan-06"]
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: In
|
||||
values: ["titan-05", "titan-07", "titan-08", "titan-11"]
|
||||
values: ["rpi4", "rpi5"]
|
||||
containers:
|
||||
- name: postgres
|
||||
image: postgres:15
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: atlas-traefik-ingress-controller
|
||||
name: traefik-ingress-controller
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
|
||||
@ -2,12 +2,12 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: atlas-traefik-ingress-controller
|
||||
name: traefik-ingress-controller
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: atlas-traefik-ingress-controller
|
||||
name: traefik-ingress-controller
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: atlas-traefik-ingress-controller
|
||||
name: traefik-ingress-controller
|
||||
namespace: traefik
|
||||
|
||||
@ -70,42 +70,10 @@ items:
|
||||
dnsPolicy: ClusterFirst
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: atlas.bstein.dev/spillover
|
||||
operator: DoesNotExist
|
||||
- weight: 95
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: NotIn
|
||||
values:
|
||||
- titan-13
|
||||
- titan-15
|
||||
- titan-17
|
||||
- titan-19
|
||||
- weight: 90
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values:
|
||||
- rpi5
|
||||
- weight: 50
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values:
|
||||
- rpi4
|
||||
restartPolicy: Always
|
||||
schedulerName: default-scheduler
|
||||
serviceAccount: atlas-traefik-ingress-controller
|
||||
serviceAccountName: atlas-traefik-ingress-controller
|
||||
serviceAccount: traefik-ingress-controller
|
||||
serviceAccountName: traefik-ingress-controller
|
||||
terminationGracePeriodSeconds: 30
|
||||
kind: List
|
||||
metadata: {}
|
||||
|
||||
@ -1,9 +0,0 @@
|
||||
# infrastructure/traefik/ingressclass.yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: IngressClass
|
||||
metadata:
|
||||
name: traefik
|
||||
annotations:
|
||||
ingressclass.kubernetes.io/is-default-class: "true"
|
||||
spec:
|
||||
controller: traefik.io/ingress-controller
|
||||
@ -6,7 +6,6 @@ metadata:
|
||||
namespace: flux-system
|
||||
resources:
|
||||
- crds.yaml
|
||||
- ingressclass.yaml
|
||||
- deployment.yaml
|
||||
- serviceaccount.yaml
|
||||
- clusterrole.yaml
|
||||
|
||||
@ -2,5 +2,5 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: atlas-traefik-ingress-controller
|
||||
name: traefik-ingress-controller
|
||||
namespace: traefik
|
||||
|
||||
@ -41,12 +41,3 @@ spec:
|
||||
failurePolicy: Ignore
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: NotIn
|
||||
values: ["titan-13", "titan-15", "titan-17", "titan-19"]
|
||||
|
||||
@ -1,152 +0,0 @@
|
||||
Atlas Cluster Power Recovery (Graceful Shutdown/Startup)
|
||||
|
||||
Purpose
|
||||
- Provide a safe operator flow for planned power events and cold-boot recovery.
|
||||
- Avoid the Flux/Gitea bootstrap deadlock by using a local bootstrap fallback path.
|
||||
- Break the Harbor self-hosting deadlock by seeding Harbor runtime images from a control-host bundle.
|
||||
- Refuse bootstrap when UPS charge is too low, and fall back to fast shutdown if a second outage hits mid-recovery.
|
||||
|
||||
Bootstrapping risk to remember
|
||||
- Flux source is Git over SSH to `scm.bstein.dev` (Gitea).
|
||||
- Gitea itself is a Flux-managed workload and depends on storage + database.
|
||||
- Harbor is also critical, but it is not part of the first recovery stage because Harbor serves its own runtime images.
|
||||
- On cold boot, if Flux cannot fetch source before Gitea is up, reconciliation can stall.
|
||||
- Recovery path: bring control plane and workers up, then locally apply minimal platform stack (`core -> helm -> longhorn -> metallb -> traefik -> vault-csi -> vault-injector -> vault -> postgres -> gitea`), then seed Harbor images onto the Harbor node from a control-host bundle, then resume/reconcile Flux. Harbor is a later recovery stage after storage, Vault, Postgres, and Gitea are back.
|
||||
|
||||
Script
|
||||
- `scripts/cluster_power_recovery.sh`
|
||||
- `scripts/cluster_power_console.sh`
|
||||
- Modes:
|
||||
- `prepare`
|
||||
- `shutdown`
|
||||
- `harbor-seed`
|
||||
- `startup`
|
||||
- `status`
|
||||
- Default is dry-run. Add `--execute` to actually perform actions.
|
||||
|
||||
Dry-run examples
|
||||
- Shutdown preview:
|
||||
- `scripts/cluster_power_recovery.sh shutdown --skip-etcd-snapshot --skip-drain`
|
||||
- Startup preview:
|
||||
- `scripts/cluster_power_recovery.sh startup`
|
||||
- Harbor seed preview:
|
||||
- `scripts/cluster_power_recovery.sh harbor-seed`
|
||||
|
||||
Execute examples
|
||||
- Prepare helper image on every node:
|
||||
- `scripts/cluster_power_recovery.sh prepare --execute`
|
||||
- Seed Harbor runtime images onto `titan-05` from the control-host bundle:
|
||||
- `scripts/cluster_power_recovery.sh harbor-seed --execute`
|
||||
- Planned shutdown:
|
||||
- `scripts/cluster_power_recovery.sh shutdown --execute`
|
||||
- Planned startup (canonical branch):
|
||||
- `scripts/cluster_power_recovery.sh startup --execute --force-flux-branch main`
|
||||
|
||||
Manual remote console examples
|
||||
- Canonical operator hosts:
|
||||
- `titan-db`
|
||||
- `tethys` (`titan-24`)
|
||||
- Both hosts now have:
|
||||
- `~/ananke-tools/cluster_power_recovery.sh`
|
||||
- `~/ananke-tools/cluster_power_console.sh`
|
||||
- `~/ananke-tools/bootstrap/recovery-config.env`
|
||||
- `~/ananke-tools/bootstrap/harbor-bootstrap-images.txt`
|
||||
- `~/ananke-tools/kubeconfig`
|
||||
- `~/ananke-cluster-power`
|
||||
- `~/bin/ananke-cluster-power`
|
||||
- `~/ananke-repo/{infrastructure,services,scripts}`
|
||||
- Both hosts also keep the Harbor bootstrap bundle at:
|
||||
- `~/.local/share/ananke/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst`
|
||||
- Remote usage:
|
||||
- `ssh titan-db`
|
||||
- `~/ananke-cluster-power status`
|
||||
- `~/ananke-cluster-power prepare --execute`
|
||||
- `~/ananke-cluster-power shutdown --execute`
|
||||
- `~/ananke-cluster-power startup --execute --force-flux-branch main`
|
||||
- `ssh tethys`
|
||||
- `~/ananke-cluster-power status`
|
||||
- `~/ananke-cluster-power prepare --execute`
|
||||
- `~/ananke-cluster-power shutdown --execute`
|
||||
- `~/ananke-cluster-power startup --execute --force-flux-branch main`
|
||||
|
||||
Useful options
|
||||
- `--shutdown-mode host-poweroff|cluster-only`
|
||||
- `--expected-flux-branch main`
|
||||
- `--expected-flux-url ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git`
|
||||
- `--force-flux-url ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git`
|
||||
- `--force-flux-branch main`
|
||||
- `--allow-flux-source-mutation` (required with `--force-flux-url`; breakglass only)
|
||||
- `--skip-local-bootstrap` (not recommended for cold-start recovery)
|
||||
- `--skip-harbor-bootstrap` (skip the Harbor recovery stage if you know Harbor should stay deferred)
|
||||
- `--skip-harbor-seed` (skip bundle import if Harbor images are already cached on the target node)
|
||||
- `--skip-helper-prewarm`
|
||||
- `--min-startup-battery 35`
|
||||
- `--ups-host pyrphoros@localhost`
|
||||
- `--require-ups-battery`
|
||||
- `--drain-timeout 180`
|
||||
- `--emergency-drain-timeout 45`
|
||||
- `--flux-ready-timeout 1200`
|
||||
- `--startup-checklist-timeout 900`
|
||||
- `--startup-stability-window 180`
|
||||
- `--startup-stability-timeout 900`
|
||||
- `--recovery-state-file ~/.local/share/ananke/cluster_power_recovery.state`
|
||||
- `--harbor-bundle-file ~/.local/share/ananke/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst`
|
||||
|
||||
Controlled drill checklist (recommended)
|
||||
- Operator host: use `titan-db` as canonical control host for the drill.
|
||||
- On-site coordination:
|
||||
- Have on-site operator ready before shutdown starts.
|
||||
- Confirm they will manually power cluster nodes back on after shutdown completes.
|
||||
- Confirm who will announce "all nodes powered on" to resume startup.
|
||||
- Preflight on `titan-db`:
|
||||
- `mkdir -p ~/ananke-logs`
|
||||
- `~/ananke-cluster-power status` and verify:
|
||||
- `ups_host=pyrphoros@localhost`
|
||||
- `ups_battery` is numeric
|
||||
- `flux_source_ready=True`
|
||||
- Warm helper image just before shutdown:
|
||||
- `~/ananke-cluster-power prepare --execute`
|
||||
- Run in a persistent shell and capture logs:
|
||||
- `tmux new -s ananke-drill`
|
||||
- `script -q -a ~/ananke-logs/ananke-drill-$(date +%Y%m%d-%H%M%S).log`
|
||||
- Execute controlled shutdown with telemetry enforcement:
|
||||
- `~/ananke-cluster-power shutdown --execute --require-ups-battery`
|
||||
- After on-site power-on confirmation, execute startup:
|
||||
- `~/ananke-cluster-power startup --execute --force-flux-branch main --require-ups-battery`
|
||||
- Post-check:
|
||||
- `~/ananke-cluster-power status`
|
||||
- Verify critical services (`longhorn`, `vault`, `postgres`, `gitea`, `harbor`, `pegasus`) and no widespread pull/crash failures.
|
||||
|
||||
Operational notes
|
||||
- The flow suspends Flux Kustomizations/HelmReleases during shutdown to prevent churn.
|
||||
- Shutdown behavior is explicit:
|
||||
- `host-poweroff` schedules host poweroff after service stop.
|
||||
- `cluster-only` stops `k3s`/`k3s-agent` without powering hosts off.
|
||||
- Worker drain is no longer best-effort only. The script now escalates from normal drain, to `--force`, to `--disable-eviction` once the configured timeout is exhausted.
|
||||
- Startup fails fast if Flux source URL/branch drift from expected values (unless branch override is explicitly requested with `--force-flux-branch`).
|
||||
- Flux desired-state source remains `titan-iac.git`. Ananke orchestrates runtime recovery and should not be used as the normal Flux source repo.
|
||||
- During startup, if Flux source is not `Ready`, local bootstrap fallback is applied first using the repo snapshot under `~/ananke-repo`.
|
||||
- Longhorn is reconciled before Vault/Postgres/Gitea so storage-backed services are not racing the volume layer.
|
||||
- Harbor is reconciled after the first critical stateful services.
|
||||
- Harbor bootstrap is now designed around a control-host bundle:
|
||||
- Build the Harbor bundle locally with `scripts/build_harbor_bootstrap_bundle.sh`.
|
||||
- Stage it on the operator host at `~/.local/share/ananke/bundles/harbor-bootstrap-v2.14.1-arm64.tar.zst`.
|
||||
- Use `harbor-seed --execute` or a full `startup --execute` to stream/import that bundle onto `titan-05`.
|
||||
- The Harbor bundle remains arm64-only because Harbor is pinned to arm64 nodes. The node-helper image is multi-arch because Ananke uses it across both arm64 and amd64 nodes during prepare/shutdown operations.
|
||||
- Ananke uses a temporary privileged helper pod for host-side operations. The helper image is prewarmed with `prepare --execute` so later shutdown/startup steps do not stall on image pulls.
|
||||
- The script persists outage state in `~/.local/share/ananke/cluster_power_recovery.state` by default. If startup is attempted during an outage window and power becomes unstable again, rerunning startup with insufficient UPS charge will flip into the emergency shutdown path instead of continuing to bootstrap.
|
||||
- Startup completion is strict now:
|
||||
- all non-optional Flux kustomizations must be `Ready=True`
|
||||
- external service checklist must pass (defaults include Gitea, Grafana, Harbor)
|
||||
- generated ingress reachability checks must pass (default accepted codes: `200,301,302,307,308,401,403,404`)
|
||||
- stability soak must pass with no crashloop/pull-failure churn
|
||||
- If Flux hits immutable one-off Job drift during reconcile, Ananke now attempts self-heal by pruning failed Flux-managed Jobs and retrying reconcile.
|
||||
- In dry-run mode, the script now skips the live API wait step so preview runs do not stall on an offline cluster.
|
||||
- Dry-run mode no longer mutates outage recovery state.
|
||||
- `harbor-seed --execute` was validated by:
|
||||
- prewarming the helper image across all nodes
|
||||
- streaming the Harbor bootstrap bundle to `titan-05`
|
||||
- importing Harbor runtime images into host `containerd`
|
||||
- successfully running a Harbor-backed canary pod (`harbor-canary-ok`)
|
||||
- After bootstrap, Flux resources are resumed and reconciled.
|
||||
- Keep this runbook aligned with `clusters/atlas/flux-system/gotk-sync.yaml`.
|
||||
@ -1,3 +0,0 @@
|
||||
[pytest]
|
||||
addopts = -ra
|
||||
norecursedirs = .git .venv .venv-ci __pycache__ tmp
|
||||
@ -1,9 +0,0 @@
|
||||
# Harbor cold-start bootstrap images.
|
||||
registry.bstein.dev/infra/harbor-core:v2.14.1-arm64
|
||||
registry.bstein.dev/infra/harbor-jobservice:v2.14.1-arm64
|
||||
registry.bstein.dev/infra/harbor-portal:v2.14.1-arm64
|
||||
registry.bstein.dev/infra/harbor-registry:v2.14.1-arm64
|
||||
registry.bstein.dev/infra/harbor-registryctl:v2.14.1-arm64
|
||||
registry.bstein.dev/infra/harbor-redis:v2.14.1-arm64
|
||||
registry.bstein.dev/infra/harbor-nginx:v2.14.1-arm64
|
||||
registry.bstein.dev/infra/harbor-prepare:v2.14.1-arm64
|
||||
@ -1,36 +0,0 @@
|
||||
CANONICAL_CONTROL_HOST="titan-db"
|
||||
DEFAULT_FLUX_BRANCH="main"
|
||||
EXPECTED_FLUX_URL="ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git"
|
||||
SHUTDOWN_MODE="host-poweroff"
|
||||
STATE_SUBDIR=".local/share/ananke"
|
||||
HARBOR_BUNDLE_BASENAME="harbor-bootstrap-v2.14.1-arm64.tar.zst"
|
||||
HARBOR_TARGET_NODE=""
|
||||
HARBOR_CANARY_NODE=""
|
||||
HARBOR_HOST_LABEL_KEY="ananke.bstein.dev/harbor-bootstrap"
|
||||
HARBOR_CANARY_IMAGE="registry.bstein.dev/bstein/kubectl:1.35.0"
|
||||
NODE_HELPER_IMAGE="registry.bstein.dev/bstein/ananke-node-helper:0.1.0"
|
||||
NODE_HELPER_NAMESPACE="maintenance"
|
||||
NODE_HELPER_SERVICE_ACCOUNT="default"
|
||||
REGISTRY_PULL_SECRET="harbor-regcred"
|
||||
BUNDLE_HTTP_PORT="8877"
|
||||
UPS_HOST="pyrphoros@localhost"
|
||||
UPS_BATTERY_KEY="battery.charge"
|
||||
FLUX_READY_TIMEOUT_SECONDS="1200"
|
||||
FLUX_READY_POLL_SECONDS="10"
|
||||
STARTUP_CHECKLIST_TIMEOUT_SECONDS="900"
|
||||
STARTUP_CHECKLIST_POLL_SECONDS="10"
|
||||
STARTUP_WORKLOAD_TIMEOUT_SECONDS="900"
|
||||
STARTUP_WORKLOAD_POLL_SECONDS="10"
|
||||
STARTUP_STABILITY_WINDOW_SECONDS="180"
|
||||
STARTUP_STABILITY_TIMEOUT_SECONDS="900"
|
||||
STARTUP_STABILITY_POLL_SECONDS="10"
|
||||
STARTUP_OPTIONAL_KUSTOMIZATIONS=""
|
||||
STARTUP_IGNORE_PODS_REGEX=""
|
||||
STARTUP_IGNORE_WORKLOADS_REGEX=""
|
||||
STARTUP_WORKLOAD_NAMESPACE_EXCLUDES_REGEX="^(kube-system|kube-public|kube-node-lease|flux-system)$"
|
||||
STARTUP_SERVICE_CHECK_TIMEOUT_SECONDS="10"
|
||||
STARTUP_INCLUDE_INGRESS_CHECKS="1"
|
||||
STARTUP_INGRESS_ALLOWED_STATUSES="200,301,302,307,308,401,403,404"
|
||||
STARTUP_IGNORE_INGRESS_HOSTS_REGEX=""
|
||||
STARTUP_INGRESS_CHECK_TIMEOUT_SECONDS="10"
|
||||
STARTUP_SERVICE_CHECKLIST='gitea|https://scm.bstein.dev/api/healthz|200|"status":"pass"||;grafana|https://metrics.bstein.dev/api/health|200|"database":"ok"||;harbor|https://registry.bstein.dev/v2/|200,401|||'
|
||||
@ -1,56 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
IMAGE="registry.bstein.dev/bstein/ananke-node-helper:0.1.0"
|
||||
DOCKER_CONFIG_PATH=""
|
||||
PLATFORMS="linux/amd64,linux/arm64"
|
||||
BUILDER_NAME="ananke-node-helper-builder"
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--image)
|
||||
IMAGE="${2:?missing image}"
|
||||
shift 2
|
||||
;;
|
||||
--docker-config)
|
||||
DOCKER_CONFIG_PATH="${2:?missing docker config path}"
|
||||
shift 2
|
||||
;;
|
||||
--platforms)
|
||||
PLATFORMS="${2:?missing platforms}"
|
||||
shift 2
|
||||
;;
|
||||
--builder)
|
||||
BUILDER_NAME="${2:?missing builder}"
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
cat <<USAGE
|
||||
Usage: scripts/build_ananke_node_helper.sh [--image <image>] [--docker-config <path>] [--platforms <csv>] [--builder <name>]
|
||||
USAGE
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -n "${DOCKER_CONFIG_PATH}" ]]; then
|
||||
export DOCKER_CONFIG="${DOCKER_CONFIG_PATH}"
|
||||
fi
|
||||
|
||||
if ! docker buildx inspect "${BUILDER_NAME}" >/dev/null 2>&1; then
|
||||
docker buildx create --name "${BUILDER_NAME}" --driver docker-container --use >/dev/null
|
||||
else
|
||||
docker buildx use "${BUILDER_NAME}" >/dev/null
|
||||
fi
|
||||
|
||||
docker buildx inspect --bootstrap >/dev/null
|
||||
docker buildx build \
|
||||
--platform "${PLATFORMS}" \
|
||||
-f dockerfiles/Dockerfile.ananke-node-helper \
|
||||
-t "${IMAGE}" \
|
||||
--push \
|
||||
.
|
||||
@ -1,58 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
IMAGES_FILE="scripts/bootstrap/harbor-bootstrap-images.txt"
|
||||
BUNDLE_FILE="artifacts/harbor-bootstrap-v2.14.1-arm64.tar.zst"
|
||||
DOCKER_CONFIG_PATH=""
|
||||
PLATFORM="linux/arm64"
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--images-file)
|
||||
IMAGES_FILE="${2:?missing images file}"
|
||||
shift 2
|
||||
;;
|
||||
--bundle-file)
|
||||
BUNDLE_FILE="${2:?missing bundle file}"
|
||||
shift 2
|
||||
;;
|
||||
--docker-config)
|
||||
DOCKER_CONFIG_PATH="${2:?missing docker config path}"
|
||||
shift 2
|
||||
;;
|
||||
--platform)
|
||||
PLATFORM="${2:?missing platform}"
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
cat <<USAGE
|
||||
Usage: scripts/build_harbor_bootstrap_bundle.sh [--images-file <path>] [--bundle-file <path>] [--docker-config <path>] [--platform <linux/arm64>]
|
||||
USAGE
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ -n "${DOCKER_CONFIG_PATH}" ]]; then
|
||||
export DOCKER_CONFIG="${DOCKER_CONFIG_PATH}"
|
||||
fi
|
||||
|
||||
mapfile -t IMAGES < <(grep -v '^[[:space:]]*#' "${IMAGES_FILE}" | sed '/^[[:space:]]*$/d')
|
||||
if [[ ${#IMAGES[@]} -eq 0 ]]; then
|
||||
echo "No images found in ${IMAGES_FILE}" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname "${BUNDLE_FILE}")"
|
||||
for image in "${IMAGES[@]}"; do
|
||||
echo "Pulling ${image}" >&2
|
||||
docker pull --platform "${PLATFORM}" "${image}" >/dev/null
|
||||
|
||||
done
|
||||
|
||||
docker save "${IMAGES[@]}" | zstd -T0 -19 -o "${BUNDLE_FILE}"
|
||||
echo "Wrote ${BUNDLE_FILE}" >&2
|
||||
@ -1,87 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
usage() {
|
||||
cat <<'USAGE'
|
||||
Usage:
|
||||
scripts/cluster_power_console.sh [--repo-dir <path>] [--delegate-host <host>] <shutdown|startup> [recovery-script-options...]
|
||||
|
||||
Purpose:
|
||||
Friendly manual entrypoint for running Ananke from a remote console.
|
||||
Canonical control host is titan-db by default so bundle/state handling stays in one place.
|
||||
|
||||
Defaults:
|
||||
--repo-dir $HOME/Development/ananke (fallback: $HOME/Development/titan-iac)
|
||||
--delegate-host titan-db
|
||||
|
||||
Examples:
|
||||
scripts/cluster_power_console.sh shutdown --execute
|
||||
scripts/cluster_power_console.sh startup --execute --force-flux-branch main
|
||||
scripts/cluster_power_console.sh --delegate-host titan-24 shutdown --execute
|
||||
USAGE
|
||||
}
|
||||
|
||||
if [[ -d "${HOME}/Development/ananke" ]]; then
|
||||
REPO_DIR="${HOME}/Development/ananke"
|
||||
else
|
||||
REPO_DIR="${HOME}/Development/titan-iac"
|
||||
fi
|
||||
DELEGATE_HOST="titan-db"
|
||||
REMOTE_REPO_DIR="${ANANKE_REMOTE_REPO_DIR:-}"
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--repo-dir)
|
||||
REPO_DIR="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
--delegate-host)
|
||||
DELEGATE_HOST="${2:-}"
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ $# -lt 1 ]]; then
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SIBLING_SCRIPT="${SCRIPT_DIR}/cluster_power_recovery.sh"
|
||||
REPO_SCRIPT="${REPO_DIR}/scripts/cluster_power_recovery.sh"
|
||||
LOCAL_SCRIPT=""
|
||||
|
||||
if [[ -x "${SIBLING_SCRIPT}" ]]; then
|
||||
LOCAL_SCRIPT="${SIBLING_SCRIPT}"
|
||||
elif [[ -x "${REPO_SCRIPT}" ]]; then
|
||||
LOCAL_SCRIPT="${REPO_SCRIPT}"
|
||||
fi
|
||||
|
||||
if [[ -n "${LOCAL_SCRIPT}" ]] && command -v kubectl >/dev/null 2>&1; then
|
||||
exec "${LOCAL_SCRIPT}" "$@"
|
||||
fi
|
||||
|
||||
if [[ -z "${DELEGATE_HOST}" ]]; then
|
||||
echo "cluster-power-console: no usable local recovery script found and no delegate host configured" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
quoted_args="$(printf '%q ' "$@")"
|
||||
quoted_repo_dir="$(printf '%q' "${REPO_DIR}")"
|
||||
|
||||
remote_cmd=""
|
||||
if [[ -n "${REMOTE_REPO_DIR}" ]]; then
|
||||
remote_cmd+="ANANKE_REPO_DIR=$(printf '%q' "${REMOTE_REPO_DIR}") "
|
||||
fi
|
||||
remote_cmd+="if [ -x ~/ananke-tools/cluster_power_recovery.sh ]; then ~/ananke-tools/cluster_power_recovery.sh ${quoted_args}; elif [ -x ${quoted_repo_dir}/scripts/cluster_power_recovery.sh ]; then ${quoted_repo_dir}/scripts/cluster_power_recovery.sh ${quoted_args}; else echo 'cluster-power-console: remote recovery script not found' >&2; exit 1; fi"
|
||||
|
||||
exec ssh -o BatchMode=yes -o ConnectTimeout=8 "${DELEGATE_HOST}" "${remote_cmd}"
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -539,9 +539,9 @@ def main() -> int:
|
||||
help="Write generated files (otherwise just print a summary).",
|
||||
)
|
||||
ap.add_argument(
|
||||
"--sync-comms",
|
||||
"--sync-atlasbot",
|
||||
action="store_true",
|
||||
help="Mirror rendered knowledge into services/comms/knowledge for atlasbot.",
|
||||
help="Mirror rendered knowledge into services/atlasbot/knowledge for atlasbot.",
|
||||
)
|
||||
args = ap.parse_args()
|
||||
|
||||
@ -632,10 +632,10 @@ def main() -> int:
|
||||
print(f"Wrote {runbooks_json_path.relative_to(REPO_ROOT)}")
|
||||
print(f"Wrote {metrics_json_path.relative_to(REPO_ROOT)}")
|
||||
|
||||
if args.sync_comms:
|
||||
comms_dir = REPO_ROOT / "services" / "comms" / "knowledge"
|
||||
_sync_tree(out_dir, comms_dir)
|
||||
print(f"Synced {out_dir.relative_to(REPO_ROOT)} -> {comms_dir.relative_to(REPO_ROOT)}")
|
||||
if args.sync_atlasbot:
|
||||
atlasbot_dir = REPO_ROOT / "services" / "atlasbot" / "knowledge"
|
||||
_sync_tree(out_dir, atlasbot_dir)
|
||||
print(f"Synced {out_dir.relative_to(REPO_ROOT)} -> {atlasbot_dir.relative_to(REPO_ROOT)}")
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
@ -1,163 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
usage() {
|
||||
cat <<USAGE
|
||||
Usage: scripts/node_recover.sh <node-name> [options]
|
||||
|
||||
Options:
|
||||
--yes Skip confirmation prompt
|
||||
--skip-drain Do not cordon/drain; only capture recovery artifacts
|
||||
--delete-node Delete Node object after drain (for hard-dead node replacement)
|
||||
--out-dir <dir> Recovery artifact directory (default: ./artifacts/node-recovery)
|
||||
-h, --help Show this help
|
||||
USAGE
|
||||
}
|
||||
|
||||
if ! command -v kubectl >/dev/null 2>&1; then
|
||||
echo "kubectl is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
if ! command -v jq >/dev/null 2>&1; then
|
||||
echo "jq is required" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "$#" -lt 1 ]; then
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
node=""
|
||||
assume_yes="false"
|
||||
skip_drain="false"
|
||||
delete_node="false"
|
||||
out_dir="./artifacts/node-recovery"
|
||||
|
||||
while [ "$#" -gt 0 ]; do
|
||||
case "$1" in
|
||||
--yes)
|
||||
assume_yes="true"
|
||||
shift
|
||||
;;
|
||||
--skip-drain)
|
||||
skip_drain="true"
|
||||
shift
|
||||
;;
|
||||
--delete-node)
|
||||
delete_node="true"
|
||||
shift
|
||||
;;
|
||||
--out-dir)
|
||||
out_dir="$2"
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
-*)
|
||||
echo "Unknown option: $1" >&2
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
if [ -z "${node}" ]; then
|
||||
node="$1"
|
||||
else
|
||||
echo "Unexpected argument: $1" >&2
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "${node}" ]; then
|
||||
echo "Node name is required" >&2
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! kubectl get node "${node}" >/dev/null 2>&1; then
|
||||
echo "Node ${node} not found in cluster API" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ "${assume_yes}" != "true" ]; then
|
||||
echo "About to prepare recovery workflow for node: ${node}"
|
||||
echo "skip_drain=${skip_drain} delete_node=${delete_node}"
|
||||
read -r -p "Type the node name to continue: " confirm
|
||||
if [ "${confirm}" != "${node}" ]; then
|
||||
echo "Confirmation did not match node name; aborting."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
timestamp="$(date +%Y%m%d-%H%M%S)"
|
||||
artifacts_dir="${out_dir}/${node}-${timestamp}"
|
||||
mkdir -p "${artifacts_dir}"
|
||||
|
||||
echo "Saving node and workload artifacts to ${artifacts_dir}"
|
||||
kubectl get node "${node}" -o json > "${artifacts_dir}/node.json"
|
||||
kubectl get node "${node}" --show-labels > "${artifacts_dir}/node.txt"
|
||||
kubectl get pods -A --field-selector "spec.nodeName=${node}" -o wide > "${artifacts_dir}/pods-on-node.txt"
|
||||
|
||||
jq -r '
|
||||
.metadata.labels
|
||||
| to_entries[]
|
||||
| select(
|
||||
.key != "kubernetes.io/hostname"
|
||||
and .key != "beta.kubernetes.io/hostname"
|
||||
and .key != "node.kubernetes.io/instance-type"
|
||||
and .key != "beta.kubernetes.io/instance-type"
|
||||
and (.key | startswith("kubernetes.io/") | not)
|
||||
and (.key | startswith("beta.kubernetes.io/") | not)
|
||||
and (.key | startswith("node.kubernetes.io/") | not)
|
||||
)
|
||||
| "kubectl label node <replacement-node> " + .key + "=" + .value + " --overwrite"
|
||||
' "${artifacts_dir}/node.json" > "${artifacts_dir}/restore-labels.sh"
|
||||
|
||||
jq -r '
|
||||
(.spec.taints // [])[]
|
||||
| "kubectl taint node <replacement-node> "
|
||||
+ .key
|
||||
+ (if .value then "=" + .value else "" end)
|
||||
+ ":"
|
||||
+ .effect
|
||||
+ " --overwrite"
|
||||
' "${artifacts_dir}/node.json" > "${artifacts_dir}/restore-taints.sh"
|
||||
|
||||
chmod +x "${artifacts_dir}/restore-labels.sh" "${artifacts_dir}/restore-taints.sh"
|
||||
|
||||
if [ "${skip_drain}" != "true" ]; then
|
||||
echo "Cordoning ${node}"
|
||||
kubectl cordon "${node}" || true
|
||||
|
||||
echo "Draining ${node}"
|
||||
if ! kubectl drain "${node}" --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=20m; then
|
||||
echo "Standard drain failed; retrying with --force"
|
||||
if ! kubectl drain "${node}" --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=20m --force; then
|
||||
echo "Force drain failed; retrying with --disable-eviction"
|
||||
kubectl drain "${node}" --ignore-daemonsets --delete-emptydir-data --grace-period=30 --timeout=20m --force --disable-eviction
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "${delete_node}" = "true" ]; then
|
||||
echo "Deleting node object ${node}"
|
||||
kubectl delete node "${node}" || true
|
||||
fi
|
||||
|
||||
cat <<NEXT
|
||||
Recovery prep complete for ${node}.
|
||||
Artifacts: ${artifacts_dir}
|
||||
|
||||
Next steps:
|
||||
1) Reimage/reprovision replacement host.
|
||||
2) Rejoin k3s and wait for node Ready.
|
||||
3) Reapply labels: ${artifacts_dir}/restore-labels.sh
|
||||
4) Reapply taints: ${artifacts_dir}/restore-taints.sh
|
||||
5) Validate pods and uncordon replacement when ready.
|
||||
NEXT
|
||||
@ -4,21 +4,13 @@ import pathlib
|
||||
|
||||
def load_module():
|
||||
path = pathlib.Path(__file__).resolve().parents[1] / "dashboards_render_atlas.py"
|
||||
spec = importlib.util.spec_from_file_location("scripts.dashboards_render_atlas", path)
|
||||
spec = importlib.util.spec_from_file_location("dashboards_render_atlas", path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def flatten_panels(panels):
|
||||
flat = []
|
||||
for panel in panels:
|
||||
flat.append(panel)
|
||||
flat.extend(panel.get("panels", []))
|
||||
return flat
|
||||
|
||||
|
||||
def test_table_panel_options_and_filterable():
|
||||
mod = load_module()
|
||||
panel = mod.table_panel(
|
||||
@ -50,18 +42,6 @@ def test_node_filter_and_expr_helpers():
|
||||
assert "node_memory_MemAvailable_bytes" in mem_expr
|
||||
|
||||
|
||||
def test_overview_availability_panel_uses_recorded_365d_rollup():
|
||||
mod = load_module()
|
||||
dashboard = mod.build_overview()
|
||||
panel = next(panel for panel in flatten_panels(dashboard["panels"]) if panel["id"] == 27)
|
||||
|
||||
assert panel["title"] == "Atlas Availability (365d)"
|
||||
assert panel["targets"][0]["expr"] == 'last_over_time(atlas:availability:ratio_365d{scope="atlas"}[30m])'
|
||||
assert panel["targets"][0]["instant"] is True
|
||||
assert "precomputed" in panel["description"]
|
||||
assert "scrape gaps are ignored" in panel["description"]
|
||||
|
||||
|
||||
def test_render_configmap_writes(tmp_path):
|
||||
mod = load_module()
|
||||
mod.DASHBOARD_DIR = tmp_path / "dash"
|
||||
@ -76,93 +56,3 @@ def test_render_configmap_writes(tmp_path):
|
||||
content = (tmp_path / "cm.yaml").read_text()
|
||||
assert "kind: ConfigMap" in content
|
||||
assert f"{uid}.json" in content
|
||||
|
||||
|
||||
def test_testing_suite_variable_uses_canonical_values_only():
|
||||
mod = load_module()
|
||||
variable = mod.testing_suite_variable()
|
||||
canonical_matcher = "|".join(mod.PLATFORM_TEST_SUITE_NAMES)
|
||||
legacy_names = {"bstein-home", "data-prepper", "titan-iac", "pegasus-health"}
|
||||
|
||||
assert variable["allValue"] == canonical_matcher
|
||||
assert not any(alias in variable["query"] for alias in legacy_names)
|
||||
assert not any(alias in variable["allValue"] for alias in legacy_names)
|
||||
assert [option["value"] for option in variable["options"]] == mod.PLATFORM_TEST_SUITE_NAMES
|
||||
|
||||
|
||||
def test_jobs_dashboard_separates_current_gate_health_from_reliability():
|
||||
mod = load_module()
|
||||
dashboard = mod.build_jobs_dashboard()
|
||||
panels_by_title = {panel["title"]: panel for panel in flatten_panels(dashboard["panels"])}
|
||||
|
||||
assert "Current Gate Health by Suite" in panels_by_title
|
||||
assert "Run Reliability by Suite (24h)" in panels_by_title
|
||||
assert "Run Reliability History by Suite" in panels_by_title
|
||||
assert "Failures by Suite (24h)" not in panels_by_title
|
||||
assert "Success Rate by Suite (24h)" not in panels_by_title
|
||||
|
||||
current_gate_expr = panels_by_title["Current Gate Health by Suite"]["targets"][0]["expr"]
|
||||
assert 'check)' in current_gate_expr
|
||||
assert 'result=~"ok|passed|success|not_applicable|skipped|na|n/a"' in current_gate_expr
|
||||
|
||||
reliability_panel = panels_by_title["Run Reliability by Suite (24h)"]
|
||||
reliability_expr = reliability_panel["targets"][0]["expr"]
|
||||
assert "platform_quality_gate_runs_total" in reliability_expr
|
||||
assert "> 0" in reliability_expr
|
||||
assert "- 1" in reliability_expr
|
||||
assert reliability_panel["fieldConfig"]["defaults"]["mappings"] == [
|
||||
{"type": "value", "options": {"-1": {"text": "no runs"}}}
|
||||
]
|
||||
|
||||
|
||||
def test_jobs_dashboard_bar_gauges_use_solid_threshold_colors():
|
||||
mod = load_module()
|
||||
dashboard = mod.build_jobs_dashboard()
|
||||
panels = flatten_panels(dashboard["panels"])
|
||||
bar_gauges = [panel for panel in panels if panel["type"] == "bargauge"]
|
||||
|
||||
assert bar_gauges
|
||||
assert all(panel["options"]["displayMode"] == "basic" for panel in bar_gauges)
|
||||
assert all(
|
||||
panel["fieldConfig"]["defaults"]["color"]["mode"] == "thresholds"
|
||||
for panel in bar_gauges
|
||||
)
|
||||
|
||||
reliability_panel = next(
|
||||
panel for panel in panels if panel["title"] == "Run Reliability by Suite (24h)"
|
||||
)
|
||||
threshold_steps = reliability_panel["fieldConfig"]["defaults"]["thresholds"]["steps"]
|
||||
|
||||
assert {"color": "yellow", "value": 93} in threshold_steps
|
||||
assert {"color": "blue", "value": 100} in threshold_steps
|
||||
|
||||
|
||||
def test_jobs_dashboard_collapses_heavy_drilldowns_for_light_first_paint():
|
||||
mod = load_module()
|
||||
dashboard = mod.build_jobs_dashboard()
|
||||
panels = dashboard["panels"]
|
||||
rows = [panel for panel in panels if panel["type"] == "row"]
|
||||
visible_query_panels = [panel for panel in panels if panel["type"] != "row"]
|
||||
nested_panels_by_title = {
|
||||
child["title"]: child
|
||||
for row in rows
|
||||
for child in row.get("panels", [])
|
||||
}
|
||||
|
||||
assert len(panels) == 16
|
||||
assert len(visible_query_panels) == 11
|
||||
assert sum(len(panel.get("targets", [])) for panel in visible_query_panels) == 11
|
||||
assert [row["title"] for row in rows] == [
|
||||
"Reliability And Run History",
|
||||
"Failure Trends By Check",
|
||||
"Success Trends By Check",
|
||||
"Test Drilldowns And Problem Tests",
|
||||
"Telemetry Completeness, SonarQube, And Branches",
|
||||
]
|
||||
assert all(row["collapsed"] for row in rows)
|
||||
|
||||
assert "Failure Trend: Coverage" in nested_panels_by_title
|
||||
assert "Success Trend: Supply Chain" in nested_panels_by_title
|
||||
assert "Selected Test Pass Rate History" in nested_panels_by_title
|
||||
assert "Missing Coverage Metrics by Suite" in nested_panels_by_title
|
||||
assert "SonarQube API Up" in nested_panels_by_title
|
||||
|
||||
@ -1,7 +1,5 @@
|
||||
import importlib.util
|
||||
import pathlib
|
||||
import sys
|
||||
import types
|
||||
|
||||
import pytest
|
||||
|
||||
@ -22,26 +20,6 @@ def load_sync_module(monkeypatch):
|
||||
}
|
||||
for k, v in env.items():
|
||||
monkeypatch.setenv(k, v)
|
||||
fake_psycopg2 = types.ModuleType("psycopg2")
|
||||
fake_psycopg2.Error = Exception
|
||||
fake_psycopg2.connect = lambda **kwargs: None
|
||||
fake_psycopg2_extras = types.ModuleType("psycopg2.extras")
|
||||
fake_psycopg2_extras.RealDictCursor = object
|
||||
fake_passlib = types.ModuleType("passlib")
|
||||
fake_passlib_hash = types.ModuleType("passlib.hash")
|
||||
|
||||
class _FakeBcryptSha256:
|
||||
@staticmethod
|
||||
def hash(password):
|
||||
return f"stub:{password}"
|
||||
|
||||
fake_passlib_hash.bcrypt_sha256 = _FakeBcryptSha256
|
||||
fake_passlib.hash = fake_passlib_hash
|
||||
|
||||
monkeypatch.setitem(sys.modules, "psycopg2", fake_psycopg2)
|
||||
monkeypatch.setitem(sys.modules, "psycopg2.extras", fake_psycopg2_extras)
|
||||
monkeypatch.setitem(sys.modules, "passlib", fake_passlib)
|
||||
monkeypatch.setitem(sys.modules, "passlib.hash", fake_passlib_hash)
|
||||
module_path = (
|
||||
pathlib.Path(__file__).resolve().parents[2]
|
||||
/ "services"
|
||||
@ -138,100 +116,6 @@ def test_kc_get_users_paginates(monkeypatch):
|
||||
assert sync.SESSION.calls == 1
|
||||
|
||||
|
||||
def test_kc_get_users_fetches_second_page_after_full_batch(monkeypatch):
|
||||
sync = load_sync_module(monkeypatch)
|
||||
|
||||
class _PagedSession:
|
||||
def __init__(self):
|
||||
self.calls = 0
|
||||
self.first_params = []
|
||||
|
||||
def get(self, *_, **kwargs):
|
||||
self.calls += 1
|
||||
self.first_params.append(kwargs["params"]["first"])
|
||||
if self.calls == 1:
|
||||
return _FakeResponse([{"id": f"u{i}"} for i in range(200)])
|
||||
return _FakeResponse([{"id": "last"}])
|
||||
|
||||
sync.SESSION = _PagedSession()
|
||||
|
||||
users = sync.kc_get_users("tok")
|
||||
|
||||
assert len(users) == 201
|
||||
assert sync.SESSION.first_params == [0, 200]
|
||||
|
||||
|
||||
def test_get_kc_token_posts_client_credentials(monkeypatch):
|
||||
sync = load_sync_module(monkeypatch)
|
||||
calls = []
|
||||
|
||||
class _TokenSession:
|
||||
def post(self, url, data, timeout):
|
||||
calls.append((url, data, timeout))
|
||||
return _FakeResponse({"access_token": "tok"})
|
||||
|
||||
sync.SESSION = _TokenSession()
|
||||
|
||||
assert sync.get_kc_token() == "tok"
|
||||
assert calls[0][1]["grant_type"] == "client_credentials"
|
||||
|
||||
|
||||
def test_retry_request_retries_then_succeeds(monkeypatch):
|
||||
sync = load_sync_module(monkeypatch)
|
||||
attempts = []
|
||||
sleeps = []
|
||||
|
||||
def _flaky():
|
||||
attempts.append(1)
|
||||
if len(attempts) == 1:
|
||||
raise sync.requests.RequestException("temporary")
|
||||
return "ok"
|
||||
|
||||
monkeypatch.setattr(sync.time, "sleep", lambda seconds: sleeps.append(seconds))
|
||||
|
||||
assert sync.retry_request("request", _flaky, attempts=2) == "ok"
|
||||
assert sleeps == [2]
|
||||
|
||||
|
||||
def test_retry_request_reraises_final_error(monkeypatch):
|
||||
sync = load_sync_module(monkeypatch)
|
||||
monkeypatch.setattr(sync.time, "sleep", lambda seconds: None)
|
||||
|
||||
with pytest.raises(sync.requests.RequestException):
|
||||
sync.retry_request(
|
||||
"request",
|
||||
lambda: (_ for _ in ()).throw(sync.requests.RequestException("nope")),
|
||||
attempts=1,
|
||||
)
|
||||
|
||||
|
||||
def test_retry_db_connect_retries_then_succeeds(monkeypatch):
|
||||
sync = load_sync_module(monkeypatch)
|
||||
attempts = []
|
||||
sleeps = []
|
||||
|
||||
def _connect(**kwargs):
|
||||
attempts.append(kwargs)
|
||||
if len(attempts) == 1:
|
||||
raise sync.psycopg2.Error("not yet")
|
||||
return "conn"
|
||||
|
||||
monkeypatch.setattr(sync.psycopg2, "connect", _connect)
|
||||
monkeypatch.setattr(sync.time, "sleep", lambda seconds: sleeps.append(seconds))
|
||||
|
||||
assert sync.retry_db_connect(attempts=2) == "conn"
|
||||
assert sleeps == [2]
|
||||
|
||||
|
||||
def test_retry_db_connect_reraises_final_error(monkeypatch):
|
||||
sync = load_sync_module(monkeypatch)
|
||||
monkeypatch.setattr(sync.psycopg2, "connect", lambda **kwargs: (_ for _ in ()).throw(sync.psycopg2.Error("down")))
|
||||
monkeypatch.setattr(sync.time, "sleep", lambda seconds: None)
|
||||
|
||||
with pytest.raises(sync.psycopg2.Error):
|
||||
sync.retry_db_connect(attempts=1)
|
||||
|
||||
|
||||
def test_ensure_mailu_user_skips_foreign_domain(monkeypatch):
|
||||
sync = load_sync_module(monkeypatch)
|
||||
executed = []
|
||||
@ -260,87 +144,6 @@ def test_ensure_mailu_user_upserts(monkeypatch):
|
||||
assert captured["password"] != "pw"
|
||||
|
||||
|
||||
def test_attribute_and_email_helpers(monkeypatch):
|
||||
sync = load_sync_module(monkeypatch)
|
||||
|
||||
assert sync.get_attribute_value({"x": ["first", "second"]}, "x") == "first"
|
||||
assert sync.get_attribute_value({"x": []}, "x") is None
|
||||
assert sync.get_attribute_value({"x": "value"}, "x") == "value"
|
||||
assert sync.mailu_enabled({"mailu_email": ["legacy@example.com"]}) is True
|
||||
assert sync.mailu_enabled({"mailu_enabled": ["off"]}) is False
|
||||
assert sync.resolve_mailu_email({"username": "fallback", "email": "user@example.com"}, {}) == "user@example.com"
|
||||
assert sync.resolve_mailu_email({"username": "fallback", "email": "user@other.com"}, {}) == "fallback@example.com"
|
||||
|
||||
|
||||
def test_safe_update_payload_filters_fields(monkeypatch):
|
||||
sync = load_sync_module(monkeypatch)
|
||||
|
||||
payload = sync._safe_update_payload(
|
||||
{
|
||||
"username": "user",
|
||||
"enabled": True,
|
||||
"email": "user@example.com",
|
||||
"emailVerified": False,
|
||||
"firstName": "User",
|
||||
"lastName": "Example",
|
||||
"requiredActions": ["UPDATE_PASSWORD", 7],
|
||||
"attributes": "not-a-dict",
|
||||
"ignored": "value",
|
||||
}
|
||||
)
|
||||
|
||||
assert payload == {
|
||||
"username": "user",
|
||||
"enabled": True,
|
||||
"email": "user@example.com",
|
||||
"emailVerified": False,
|
||||
"firstName": "User",
|
||||
"lastName": "Example",
|
||||
"requiredActions": ["UPDATE_PASSWORD"],
|
||||
"attributes": {},
|
||||
}
|
||||
|
||||
|
||||
def test_ensure_system_mailboxes_handles_configurations(monkeypatch, capsys):
|
||||
sync = load_sync_module(monkeypatch)
|
||||
ensured = []
|
||||
monkeypatch.setattr(sync, "MAILU_SYSTEM_USERS", ["postmaster@example.com", "abuse"])
|
||||
monkeypatch.setattr(sync, "MAILU_SYSTEM_PASSWORD", "")
|
||||
|
||||
sync.ensure_system_mailboxes(object())
|
||||
|
||||
assert "MAILU_SYSTEM_PASSWORD is missing" in capsys.readouterr().out
|
||||
|
||||
def _ensure(cursor, email, password, display_name):
|
||||
ensured.append((email, password, display_name))
|
||||
if email == "abuse":
|
||||
raise RuntimeError("boom")
|
||||
|
||||
monkeypatch.setattr(sync, "MAILU_SYSTEM_PASSWORD", "pw")
|
||||
monkeypatch.setattr(sync, "ensure_mailu_user", _ensure)
|
||||
|
||||
sync.ensure_system_mailboxes(object())
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert ensured == [
|
||||
("postmaster@example.com", "pw", "postmaster"),
|
||||
("abuse", "pw", "abuse"),
|
||||
]
|
||||
assert "Ensured system mailbox for postmaster@example.com" in out
|
||||
assert "Failed to ensure system mailbox abuse" in out
|
||||
|
||||
|
||||
def test_main_exits_without_users_or_system_mailboxes(monkeypatch, capsys):
|
||||
sync = load_sync_module(monkeypatch)
|
||||
monkeypatch.setattr(sync, "MAILU_SYSTEM_USERS", [])
|
||||
monkeypatch.setattr(sync, "get_kc_token", lambda: "tok")
|
||||
monkeypatch.setattr(sync, "kc_get_users", lambda token: [])
|
||||
|
||||
sync.main()
|
||||
|
||||
assert "No users found; exiting." in capsys.readouterr().out
|
||||
|
||||
|
||||
def test_main_generates_password_and_upserts(monkeypatch):
|
||||
sync = load_sync_module(monkeypatch)
|
||||
monkeypatch.setattr(sync.bcrypt_sha256, "hash", lambda password: f"hash:{password}")
|
||||
|
||||
@ -1,134 +0,0 @@
|
||||
import importlib.util
|
||||
import io
|
||||
import pathlib
|
||||
import types
|
||||
|
||||
|
||||
def load_listener_module(monkeypatch):
|
||||
monkeypatch.setenv("MAILU_SYNC_WAIT_TIMEOUT_SEC", "0")
|
||||
module_path = (
|
||||
pathlib.Path(__file__).resolve().parents[2]
|
||||
/ "services"
|
||||
/ "mailu"
|
||||
/ "scripts"
|
||||
/ "mailu_sync_listener.py"
|
||||
)
|
||||
spec = importlib.util.spec_from_file_location("mailu_sync_listener_testmod", module_path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
assert spec.loader is not None
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def _handler_for(listener, body):
|
||||
handler = listener.Handler.__new__(listener.Handler)
|
||||
raw = body if isinstance(body, bytes) else body.encode()
|
||||
handler.headers = {"Content-Length": str(len(raw))}
|
||||
handler.rfile = io.BytesIO(raw)
|
||||
handler.responses = []
|
||||
handler.headers_ended = 0
|
||||
handler.send_response = lambda code: handler.responses.append(code)
|
||||
handler.end_headers = lambda: setattr(handler, "headers_ended", handler.headers_ended + 1)
|
||||
return handler
|
||||
|
||||
|
||||
def test_listener_run_sync_blocking_updates_state(monkeypatch):
|
||||
listener = load_listener_module(monkeypatch)
|
||||
monkeypatch.setattr(listener, "time", lambda: 42.0)
|
||||
monkeypatch.setattr(
|
||||
listener.subprocess,
|
||||
"run",
|
||||
lambda command, check: types.SimpleNamespace(returncode=3),
|
||||
)
|
||||
|
||||
assert listener._run_sync_blocking() == 3
|
||||
assert listener.last_rc == 3
|
||||
assert listener.last_run == 42.0
|
||||
assert listener.sync_done.is_set()
|
||||
|
||||
listener.sync_running = True
|
||||
assert listener._run_sync_blocking() == 0
|
||||
|
||||
|
||||
def test_listener_trigger_sync_async_honors_running_and_debounce(monkeypatch):
|
||||
listener = load_listener_module(monkeypatch)
|
||||
starts = []
|
||||
|
||||
class _Thread:
|
||||
def __init__(self, target, daemon):
|
||||
self.target = target
|
||||
self.daemon = daemon
|
||||
|
||||
def start(self):
|
||||
starts.append((self.target, self.daemon))
|
||||
|
||||
monkeypatch.setattr(listener.threading, "Thread", _Thread)
|
||||
monkeypatch.setattr(listener, "time", lambda: 100.0)
|
||||
|
||||
listener.sync_running = True
|
||||
assert listener._trigger_sync_async() is False
|
||||
|
||||
listener.sync_running = False
|
||||
listener.last_run = 95.0
|
||||
assert listener._trigger_sync_async() is False
|
||||
|
||||
assert listener._trigger_sync_async(force=True) is True
|
||||
assert starts and starts[0][1] is True
|
||||
|
||||
|
||||
def test_listener_post_rejects_invalid_json(monkeypatch):
|
||||
listener = load_listener_module(monkeypatch)
|
||||
handler = _handler_for(listener, b"{not-json")
|
||||
|
||||
handler.do_POST()
|
||||
|
||||
assert handler.responses == [400]
|
||||
assert handler.headers_ended == 1
|
||||
|
||||
|
||||
def test_listener_post_triggers_async_without_wait(monkeypatch):
|
||||
listener = load_listener_module(monkeypatch)
|
||||
called = []
|
||||
monkeypatch.setattr(listener, "_trigger_sync_async", lambda force=False: called.append(force) or True)
|
||||
handler = _handler_for(listener, '{"force": true}')
|
||||
|
||||
handler.do_POST()
|
||||
|
||||
assert called == [True]
|
||||
assert handler.responses == [202]
|
||||
|
||||
|
||||
def test_listener_post_wait_returns_success_or_failure(monkeypatch):
|
||||
listener = load_listener_module(monkeypatch)
|
||||
called = []
|
||||
monkeypatch.setattr(listener, "_trigger_sync_async", lambda force=False: called.append(force) or True)
|
||||
listener.sync_running = False
|
||||
listener.last_rc = 0
|
||||
handler = _handler_for(listener, '{"wait": true, "force": true}')
|
||||
|
||||
handler.do_POST()
|
||||
|
||||
assert called == [True]
|
||||
assert handler.responses == [200]
|
||||
|
||||
listener.last_rc = 2
|
||||
handler = _handler_for(listener, '{"wait": true}')
|
||||
handler.do_POST()
|
||||
assert handler.responses == [500]
|
||||
|
||||
|
||||
def test_listener_post_wait_keeps_running_request_successful(monkeypatch):
|
||||
listener = load_listener_module(monkeypatch)
|
||||
listener.sync_running = True
|
||||
handler = _handler_for(listener, '{"wait": true}')
|
||||
|
||||
handler.do_POST()
|
||||
|
||||
assert handler.responses == [200]
|
||||
|
||||
|
||||
def test_listener_log_message_is_quiet(monkeypatch):
|
||||
listener = load_listener_module(monkeypatch)
|
||||
handler = listener.Handler.__new__(listener.Handler)
|
||||
|
||||
assert handler.log_message("ignored %s", "value") is None
|
||||
@ -1,73 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
MODE="${1:-dry-run}"
|
||||
if [[ "$MODE" != "dry-run" && "$MODE" != "active" ]]; then
|
||||
echo "usage: $0 [dry-run|active]" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
EXPECTED_DRY_RUN="true"
|
||||
PROM_MODE="dry_run"
|
||||
if [[ "$MODE" == "active" ]]; then
|
||||
EXPECTED_DRY_RUN="false"
|
||||
PROM_MODE="delete"
|
||||
fi
|
||||
|
||||
KUSTOMIZATION="${KUSTOMIZATION:-maintenance}"
|
||||
NAMESPACE="${NAMESPACE:-maintenance}"
|
||||
DEPLOYMENT="${DEPLOYMENT:-ariadne}"
|
||||
LOCAL_METRICS_PORT="${LOCAL_METRICS_PORT:-18080}"
|
||||
|
||||
for cmd in flux kubectl curl grep awk; do
|
||||
if ! command -v "$cmd" >/dev/null 2>&1; then
|
||||
echo "missing required command: $cmd" >&2
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
|
||||
echo "[1/5] reconcile Flux kustomization: ${KUSTOMIZATION}"
|
||||
flux reconcile kustomization "$KUSTOMIZATION" --namespace flux-system --with-source
|
||||
|
||||
echo "[2/5] wait for deployment rollout"
|
||||
kubectl -n "$NAMESPACE" rollout status "deployment/$DEPLOYMENT" --timeout=5m
|
||||
|
||||
echo "[3/5] verify ariadne env wiring"
|
||||
ENV_DUMP="$(kubectl -n "$NAMESPACE" get deployment "$DEPLOYMENT" -o jsonpath='{range .spec.template.spec.containers[0].env[*]}{.name}={.value}{"\n"}{end}')"
|
||||
echo "$ENV_DUMP" | grep -F "ARIADNE_SCHEDULE_JENKINS_WORKSPACE_CLEANUP=45 */6 * * *"
|
||||
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_NAMESPACE=jenkins"
|
||||
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_PVC_PREFIX=pvc-workspace-"
|
||||
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS=24"
|
||||
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_DRY_RUN=${EXPECTED_DRY_RUN}"
|
||||
echo "$ENV_DUMP" | grep -F "JENKINS_WORKSPACE_CLEANUP_MAX_DELETIONS_PER_RUN=20"
|
||||
|
||||
echo "[4/5] scrape /metrics and confirm cleanup metrics are exported"
|
||||
PF_LOG="$(mktemp)"
|
||||
METRICS_FILE="$(mktemp)"
|
||||
cleanup() {
|
||||
if [[ -n "${PF_PID:-}" ]]; then
|
||||
kill "$PF_PID" >/dev/null 2>&1 || true
|
||||
wait "$PF_PID" 2>/dev/null || true
|
||||
fi
|
||||
rm -f "$PF_LOG" "$METRICS_FILE"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
kubectl -n "$NAMESPACE" port-forward "deployment/$DEPLOYMENT" "${LOCAL_METRICS_PORT}:8080" >"$PF_LOG" 2>&1 &
|
||||
PF_PID=$!
|
||||
sleep 2
|
||||
curl -fsS "http://127.0.0.1:${LOCAL_METRICS_PORT}/metrics" >"$METRICS_FILE"
|
||||
grep -F "# HELP ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE"
|
||||
grep -F "# HELP ariadne_jenkins_workspace_cleanup_objects_total" "$METRICS_FILE"
|
||||
|
||||
echo "[5/5] show recent cleanup signal"
|
||||
if grep -q "ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE"; then
|
||||
grep "ariadne_jenkins_workspace_cleanup_runs_total" "$METRICS_FILE" | grep "mode=\"${PROM_MODE}\"" || true
|
||||
else
|
||||
echo "No run counter sample yet for mode=${PROM_MODE}; wait for schedule window and re-run." >&2
|
||||
fi
|
||||
|
||||
echo "Recent cleanup logs (if any):"
|
||||
kubectl -n "$NAMESPACE" logs "deployment/$DEPLOYMENT" --tail=500 | grep -i "jenkins workspace cleanup" | tail -n 20 || true
|
||||
|
||||
echo "verification complete for mode=${MODE}"
|
||||
@ -5,7 +5,7 @@ metadata:
|
||||
name: ollama
|
||||
namespace: ai
|
||||
spec:
|
||||
replicas: 0
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 2
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
@ -21,7 +21,7 @@ spec:
|
||||
app: ollama
|
||||
annotations:
|
||||
ai.bstein.dev/model: qwen2.5:14b-instruct-q4_0
|
||||
ai.bstein.dev/gpu: GPU pool (titan-20/21)
|
||||
ai.bstein.dev/gpu: GPU pool (titan-22/24)
|
||||
ai.bstein.dev/restartedAt: "2026-01-26T12:00:00Z"
|
||||
spec:
|
||||
affinity:
|
||||
@ -32,13 +32,13 @@ spec:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: In
|
||||
values:
|
||||
- titan-20
|
||||
- titan-21
|
||||
- titan-22
|
||||
- titan-24
|
||||
runtimeClassName: nvidia
|
||||
volumes:
|
||||
- name: models
|
||||
persistentVolumeClaim:
|
||||
claimName: ollama-models-asteria
|
||||
claimName: ollama-models
|
||||
initContainers:
|
||||
- name: warm-model
|
||||
image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d
|
||||
|
||||
@ -2,12 +2,12 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: ollama-models-asteria
|
||||
name: ollama-models
|
||||
namespace: ai
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
- ReadWriteOnce
|
||||
resources:
|
||||
requests:
|
||||
storage: 30Gi
|
||||
storageClassName: asteria
|
||||
storageClassName: astreae
|
||||
|
||||
@ -3,7 +3,7 @@ apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: atlasbot
|
||||
namespace: comms
|
||||
namespace: ai
|
||||
labels:
|
||||
app: atlasbot
|
||||
spec:
|
||||
@ -16,9 +16,9 @@ spec:
|
||||
labels:
|
||||
app: atlasbot
|
||||
annotations:
|
||||
checksum/atlasbot-configmap: manual-atlasbot-103
|
||||
checksum/atlasbot-configmap: manual-atlasbot-101
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/role: "comms"
|
||||
vault.hashicorp.com/role: "ai"
|
||||
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
|
||||
vault.hashicorp.com/agent-inject-template-turn-secret: |
|
||||
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
|
||||
@ -67,17 +67,17 @@ spec:
|
||||
hardware: rpi5
|
||||
containers:
|
||||
- name: atlasbot
|
||||
image: python:3.11-slim
|
||||
image: registry.bstein.dev/bstein/atlasbot:0.1.0-55
|
||||
command: ["/bin/sh","-c"]
|
||||
args:
|
||||
- |
|
||||
. /vault/scripts/comms_vault_env.sh
|
||||
exec python /app/bot.py
|
||||
. /vault/scripts/atlasbot_vault_env.sh
|
||||
exec python -m atlasbot.main
|
||||
env:
|
||||
- name: MATRIX_BASE
|
||||
value: http://othrys-synapse-matrix-synapse:8008
|
||||
value: http://othrys-synapse-matrix-synapse.comms.svc.cluster.local:8008
|
||||
- name: AUTH_BASE
|
||||
value: http://matrix-authentication-service:8080
|
||||
value: http://matrix-authentication-service.comms.svc.cluster.local:8080
|
||||
- name: KB_DIR
|
||||
value: /kb
|
||||
- name: VM_URL
|
||||
@ -93,7 +93,7 @@ spec:
|
||||
- name: BOT_USER_GENIUS
|
||||
value: atlas-genius
|
||||
- name: BOT_MENTIONS
|
||||
value: atlas-quick,atlas-smart,atlas-genius,atlas_quick,atlas_smart,atlas_genius
|
||||
value: atlas-quick,atlas-smart,atlas-genius
|
||||
- name: OLLAMA_URL
|
||||
value: http://ollama.ai.svc.cluster.local:11434
|
||||
- name: OLLAMA_MODEL
|
||||
@ -104,26 +104,50 @@ spec:
|
||||
value: qwen2.5:14b-instruct-q4_0
|
||||
- name: ATLASBOT_MODEL_GENIUS
|
||||
value: qwen2.5:14b-instruct-q4_0
|
||||
- name: ATLASBOT_MODEL_DEEP
|
||||
value: qwen2.5:14b-instruct-q4_0
|
||||
- name: OLLAMA_FALLBACK_MODEL
|
||||
value: qwen2.5:14b-instruct-q4_0
|
||||
- name: OLLAMA_TIMEOUT_SEC
|
||||
value: "600"
|
||||
- name: OLLAMA_RETRIES
|
||||
value: "0"
|
||||
- name: ATLASBOT_THINKING_INTERVAL_SEC
|
||||
value: "30"
|
||||
- name: ATLASBOT_QUICK_TIME_BUDGET_SEC
|
||||
value: "15"
|
||||
- name: ATLASBOT_SMART_TIME_BUDGET_SEC
|
||||
value: "45"
|
||||
- name: ATLASBOT_GENIUS_TIME_BUDGET_SEC
|
||||
value: "180"
|
||||
- name: ATLASBOT_OLLAMA_RETRIES
|
||||
value: "0"
|
||||
- name: ATLASBOT_THINKING_INTERVAL_SEC
|
||||
value: "30"
|
||||
- name: ATLASBOT_SNAPSHOT_TTL_SEC
|
||||
value: "30"
|
||||
- name: ATLASBOT_HTTP_PORT
|
||||
value: "8090"
|
||||
- name: ATLASBOT_STATE_DB
|
||||
value: /data/atlasbot_state.db
|
||||
- name: ATLASBOT_QUEUE_ENABLED
|
||||
value: "false"
|
||||
- name: ATLASBOT_DEBUG_PIPELINE
|
||||
value: "true"
|
||||
- name: ATLASBOT_NATS_URL
|
||||
value: nats://nats.nats.svc.cluster.local:4222
|
||||
- name: ATLASBOT_NATS_STREAM
|
||||
value: atlasbot
|
||||
- name: ATLASBOT_NATS_SUBJECT
|
||||
value: atlasbot.requests
|
||||
- name: ATLASBOT_FAST_MAX_ANGLES
|
||||
value: "2"
|
||||
- name: ATLASBOT_SMART_MAX_ANGLES
|
||||
value: "5"
|
||||
- name: ATLASBOT_FAST_MAX_CANDIDATES
|
||||
value: "2"
|
||||
- name: ATLASBOT_SMART_MAX_CANDIDATES
|
||||
value: "6"
|
||||
- name: ATLASBOT_FAST_LLM_CALLS_MAX
|
||||
value: "8"
|
||||
- name: ATLASBOT_SMART_LLM_CALLS_MAX
|
||||
value: "24"
|
||||
- name: ATLASBOT_GENIUS_LLM_CALLS_MAX
|
||||
value: "72"
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8090
|
||||
@ -135,19 +159,15 @@ spec:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
volumeMounts:
|
||||
- name: code
|
||||
mountPath: /app/bot.py
|
||||
subPath: bot.py
|
||||
- name: kb
|
||||
mountPath: /kb
|
||||
readOnly: true
|
||||
- name: vault-scripts
|
||||
mountPath: /vault/scripts
|
||||
readOnly: true
|
||||
- name: atlasbot-state
|
||||
mountPath: /data
|
||||
volumes:
|
||||
- name: code
|
||||
configMap:
|
||||
name: atlasbot
|
||||
- name: kb
|
||||
configMap:
|
||||
name: atlas-kb
|
||||
@ -166,5 +186,7 @@ spec:
|
||||
path: diagrams/atlas-http.mmd
|
||||
- name: vault-scripts
|
||||
configMap:
|
||||
name: comms-vault-env
|
||||
name: atlasbot-vault-env
|
||||
defaultMode: 0555
|
||||
- name: atlasbot-state
|
||||
emptyDir: {}
|
||||
@ -3,7 +3,9 @@ apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: atlasbot
|
||||
namespace: comms
|
||||
namespace: ai
|
||||
imagePullSecrets:
|
||||
- name: harbor-regcred
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
@ -43,5 +45,4 @@ roleRef:
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: atlasbot
|
||||
namespace: comms
|
||||
|
||||
namespace: ai
|
||||
@ -2,7 +2,7 @@ apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: atlasbot
|
||||
namespace: comms
|
||||
namespace: ai
|
||||
labels:
|
||||
app: atlasbot
|
||||
spec:
|
||||
26
services/atlasbot/image-automation.yaml
Normal file
26
services/atlasbot/image-automation.yaml
Normal file
@ -0,0 +1,26 @@
|
||||
# services/atlasbot/image-automation.yaml
|
||||
apiVersion: image.toolkit.fluxcd.io/v1
|
||||
kind: ImageUpdateAutomation
|
||||
metadata:
|
||||
name: atlasbot
|
||||
namespace: ai
|
||||
spec:
|
||||
interval: 1m0s
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
namespace: flux-system
|
||||
git:
|
||||
checkout:
|
||||
ref:
|
||||
branch: feature/atlasbot
|
||||
commit:
|
||||
author:
|
||||
name: flux-bot
|
||||
email: ops@bstein.dev
|
||||
messageTemplate: "chore(atlasbot): automated image update"
|
||||
push:
|
||||
branch: feature/atlasbot
|
||||
update:
|
||||
path: services/atlasbot
|
||||
strategy: Setters
|
||||
23
services/atlasbot/image.yaml
Normal file
23
services/atlasbot/image.yaml
Normal file
@ -0,0 +1,23 @@
|
||||
# services/comms/image.yaml
|
||||
apiVersion: image.toolkit.fluxcd.io/v1beta2
|
||||
kind: ImageRepository
|
||||
metadata:
|
||||
name: atlasbot
|
||||
namespace: ai
|
||||
spec:
|
||||
image: registry.bstein.dev/bstein/atlasbot
|
||||
interval: 1m0s
|
||||
secretRef:
|
||||
name: harbor-regcred
|
||||
---
|
||||
apiVersion: image.toolkit.fluxcd.io/v1beta2
|
||||
kind: ImagePolicy
|
||||
metadata:
|
||||
name: atlasbot
|
||||
namespace: ai
|
||||
spec:
|
||||
imageRepositoryRef:
|
||||
name: atlasbot
|
||||
policy:
|
||||
semver:
|
||||
range: ">=0.1.0-0"
|
||||
22
services/atlasbot/knowledge/INDEX.md
Normal file
22
services/atlasbot/knowledge/INDEX.md
Normal file
@ -0,0 +1,22 @@
|
||||
Atlas Knowledge Base (KB)
|
||||
|
||||
This folder is the source-of-truth “memory” for Atlas/Titan assistants (and for humans). It is designed to be:
|
||||
- Accurate (grounded in GitOps + read-only cluster tools)
|
||||
- Maintainable (small docs + deterministic generators)
|
||||
- Safe (no secrets; refer to Secret/Vault paths by name only)
|
||||
|
||||
Layout
|
||||
- `knowledge/runbooks/`: human-written docs (short, chunkable Markdown).
|
||||
- `knowledge/catalog/`: generated machine-readable facts (YAML/JSON).
|
||||
- `knowledge/diagrams/`: generated Mermaid diagrams (`.mmd`) derived from the catalog.
|
||||
|
||||
Regeneration
|
||||
- Update manifests/docs, then regenerate generated artifacts:
|
||||
- `python scripts/knowledge_render_atlas.py --write`
|
||||
|
||||
Authoring rules
|
||||
- Never include secret values. Prefer `secretRef` names or Vault paths like `kv/atlas/...`.
|
||||
- Prefer stable identifiers: Kubernetes `namespace/name`, DNS hostnames, Flux kustomization paths.
|
||||
- Keep each runbook small; one topic per file; use headings.
|
||||
- When in doubt, link to the exact file path in this repo that configures the behavior.
|
||||
|
||||
8
services/atlasbot/knowledge/catalog/atlas-summary.json
Normal file
8
services/atlasbot/knowledge/catalog/atlas-summary.json
Normal file
@ -0,0 +1,8 @@
|
||||
{
|
||||
"counts": {
|
||||
"helmrelease_host_hints": 19,
|
||||
"http_endpoints": 45,
|
||||
"services": 47,
|
||||
"workloads": 74
|
||||
}
|
||||
}
|
||||
3445
services/atlasbot/knowledge/catalog/atlas.json
Normal file
3445
services/atlasbot/knowledge/catalog/atlas.json
Normal file
File diff suppressed because it is too large
Load Diff
1880
services/atlasbot/knowledge/catalog/metrics.json
Normal file
1880
services/atlasbot/knowledge/catalog/metrics.json
Normal file
File diff suppressed because it is too large
Load Diff
97
services/atlasbot/knowledge/catalog/runbooks.json
Normal file
97
services/atlasbot/knowledge/catalog/runbooks.json
Normal file
File diff suppressed because one or more lines are too long
234
services/atlasbot/knowledge/diagrams/atlas-http.mmd
Normal file
234
services/atlasbot/knowledge/diagrams/atlas-http.mmd
Normal file
@ -0,0 +1,234 @@
|
||||
flowchart LR
|
||||
host_auth_bstein_dev["auth.bstein.dev"]
|
||||
svc_sso_oauth2_proxy["sso/oauth2-proxy (Service)"]
|
||||
host_auth_bstein_dev --> svc_sso_oauth2_proxy
|
||||
wl_sso_oauth2_proxy["sso/oauth2-proxy (Deployment)"]
|
||||
svc_sso_oauth2_proxy --> wl_sso_oauth2_proxy
|
||||
host_bstein_dev["bstein.dev"]
|
||||
svc_bstein_dev_home_bstein_dev_home_frontend["bstein-dev-home/bstein-dev-home-frontend (Service)"]
|
||||
host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_frontend
|
||||
wl_bstein_dev_home_bstein_dev_home_frontend["bstein-dev-home/bstein-dev-home-frontend (Deployment)"]
|
||||
svc_bstein_dev_home_bstein_dev_home_frontend --> wl_bstein_dev_home_bstein_dev_home_frontend
|
||||
svc_comms_matrix_wellknown["comms/matrix-wellknown (Service)"]
|
||||
host_bstein_dev --> svc_comms_matrix_wellknown
|
||||
wl_comms_matrix_wellknown["comms/matrix-wellknown (Deployment)"]
|
||||
svc_comms_matrix_wellknown --> wl_comms_matrix_wellknown
|
||||
svc_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Service)"]
|
||||
host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_backend
|
||||
wl_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Deployment)"]
|
||||
svc_bstein_dev_home_bstein_dev_home_backend --> wl_bstein_dev_home_bstein_dev_home_backend
|
||||
host_budget_bstein_dev["budget.bstein.dev"]
|
||||
svc_finance_actual_budget["finance/actual-budget (Service)"]
|
||||
host_budget_bstein_dev --> svc_finance_actual_budget
|
||||
wl_finance_actual_budget["finance/actual-budget (Deployment)"]
|
||||
svc_finance_actual_budget --> wl_finance_actual_budget
|
||||
host_call_live_bstein_dev["call.live.bstein.dev"]
|
||||
svc_comms_element_call["comms/element-call (Service)"]
|
||||
host_call_live_bstein_dev --> svc_comms_element_call
|
||||
wl_comms_element_call["comms/element-call (Deployment)"]
|
||||
svc_comms_element_call --> wl_comms_element_call
|
||||
host_chat_ai_bstein_dev["chat.ai.bstein.dev"]
|
||||
svc_bstein_dev_home_chat_ai_gateway["bstein-dev-home/chat-ai-gateway (Service)"]
|
||||
host_chat_ai_bstein_dev --> svc_bstein_dev_home_chat_ai_gateway
|
||||
wl_bstein_dev_home_chat_ai_gateway["bstein-dev-home/chat-ai-gateway (Deployment)"]
|
||||
svc_bstein_dev_home_chat_ai_gateway --> wl_bstein_dev_home_chat_ai_gateway
|
||||
host_ci_bstein_dev["ci.bstein.dev"]
|
||||
svc_jenkins_jenkins["jenkins/jenkins (Service)"]
|
||||
host_ci_bstein_dev --> svc_jenkins_jenkins
|
||||
wl_jenkins_jenkins["jenkins/jenkins (Deployment)"]
|
||||
svc_jenkins_jenkins --> wl_jenkins_jenkins
|
||||
host_cloud_bstein_dev["cloud.bstein.dev"]
|
||||
svc_nextcloud_nextcloud["nextcloud/nextcloud (Service)"]
|
||||
host_cloud_bstein_dev --> svc_nextcloud_nextcloud
|
||||
wl_nextcloud_nextcloud["nextcloud/nextcloud (Deployment)"]
|
||||
svc_nextcloud_nextcloud --> wl_nextcloud_nextcloud
|
||||
host_health_bstein_dev["health.bstein.dev"]
|
||||
svc_health_wger["health/wger (Service)"]
|
||||
host_health_bstein_dev --> svc_health_wger
|
||||
wl_health_wger["health/wger (Deployment)"]
|
||||
svc_health_wger --> wl_health_wger
|
||||
host_kit_live_bstein_dev["kit.live.bstein.dev"]
|
||||
svc_comms_livekit_token_service["comms/livekit-token-service (Service)"]
|
||||
host_kit_live_bstein_dev --> svc_comms_livekit_token_service
|
||||
wl_comms_livekit_token_service["comms/livekit-token-service (Deployment)"]
|
||||
svc_comms_livekit_token_service --> wl_comms_livekit_token_service
|
||||
svc_comms_livekit["comms/livekit (Service)"]
|
||||
host_kit_live_bstein_dev --> svc_comms_livekit
|
||||
wl_comms_livekit["comms/livekit (Deployment)"]
|
||||
svc_comms_livekit --> wl_comms_livekit
|
||||
host_live_bstein_dev["live.bstein.dev"]
|
||||
host_live_bstein_dev --> svc_comms_matrix_wellknown
|
||||
svc_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Service)"]
|
||||
host_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse
|
||||
svc_comms_matrix_guest_register["comms/matrix-guest-register (Service)"]
|
||||
host_live_bstein_dev --> svc_comms_matrix_guest_register
|
||||
wl_comms_matrix_guest_register["comms/matrix-guest-register (Deployment)"]
|
||||
svc_comms_matrix_guest_register --> wl_comms_matrix_guest_register
|
||||
svc_comms_matrix_authentication_service["comms/matrix-authentication-service (Service)"]
|
||||
host_live_bstein_dev --> svc_comms_matrix_authentication_service
|
||||
wl_comms_matrix_authentication_service["comms/matrix-authentication-service (Deployment)"]
|
||||
svc_comms_matrix_authentication_service --> wl_comms_matrix_authentication_service
|
||||
host_logs_bstein_dev["logs.bstein.dev"]
|
||||
svc_logging_oauth2_proxy_logs["logging/oauth2-proxy-logs (Service)"]
|
||||
host_logs_bstein_dev --> svc_logging_oauth2_proxy_logs
|
||||
wl_logging_oauth2_proxy_logs["logging/oauth2-proxy-logs (Deployment)"]
|
||||
svc_logging_oauth2_proxy_logs --> wl_logging_oauth2_proxy_logs
|
||||
host_longhorn_bstein_dev["longhorn.bstein.dev"]
|
||||
svc_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Service)"]
|
||||
host_longhorn_bstein_dev --> svc_longhorn_system_oauth2_proxy_longhorn
|
||||
wl_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Deployment)"]
|
||||
svc_longhorn_system_oauth2_proxy_longhorn --> wl_longhorn_system_oauth2_proxy_longhorn
|
||||
host_mail_bstein_dev["mail.bstein.dev"]
|
||||
svc_mailu_mailserver_mailu_front["mailu-mailserver/mailu-front (Service)"]
|
||||
host_mail_bstein_dev --> svc_mailu_mailserver_mailu_front
|
||||
host_matrix_live_bstein_dev["matrix.live.bstein.dev"]
|
||||
host_matrix_live_bstein_dev --> svc_comms_matrix_authentication_service
|
||||
host_matrix_live_bstein_dev --> svc_comms_matrix_wellknown
|
||||
host_matrix_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse
|
||||
host_matrix_live_bstein_dev --> svc_comms_matrix_guest_register
|
||||
host_monero_bstein_dev["monero.bstein.dev"]
|
||||
svc_crypto_monerod["crypto/monerod (Service)"]
|
||||
host_monero_bstein_dev --> svc_crypto_monerod
|
||||
wl_crypto_monerod["crypto/monerod (Deployment)"]
|
||||
svc_crypto_monerod --> wl_crypto_monerod
|
||||
host_money_bstein_dev["money.bstein.dev"]
|
||||
svc_finance_firefly["finance/firefly (Service)"]
|
||||
host_money_bstein_dev --> svc_finance_firefly
|
||||
wl_finance_firefly["finance/firefly (Deployment)"]
|
||||
svc_finance_firefly --> wl_finance_firefly
|
||||
host_notes_bstein_dev["notes.bstein.dev"]
|
||||
svc_outline_outline["outline/outline (Service)"]
|
||||
host_notes_bstein_dev --> svc_outline_outline
|
||||
wl_outline_outline["outline/outline (Deployment)"]
|
||||
svc_outline_outline --> wl_outline_outline
|
||||
host_office_bstein_dev["office.bstein.dev"]
|
||||
svc_nextcloud_collabora["nextcloud/collabora (Service)"]
|
||||
host_office_bstein_dev --> svc_nextcloud_collabora
|
||||
wl_nextcloud_collabora["nextcloud/collabora (Deployment)"]
|
||||
svc_nextcloud_collabora --> wl_nextcloud_collabora
|
||||
host_pegasus_bstein_dev["pegasus.bstein.dev"]
|
||||
svc_jellyfin_pegasus["jellyfin/pegasus (Service)"]
|
||||
host_pegasus_bstein_dev --> svc_jellyfin_pegasus
|
||||
wl_jellyfin_pegasus["jellyfin/pegasus (Deployment)"]
|
||||
svc_jellyfin_pegasus --> wl_jellyfin_pegasus
|
||||
host_scm_bstein_dev["scm.bstein.dev"]
|
||||
svc_gitea_gitea["gitea/gitea (Service)"]
|
||||
host_scm_bstein_dev --> svc_gitea_gitea
|
||||
wl_gitea_gitea["gitea/gitea (Deployment)"]
|
||||
svc_gitea_gitea --> wl_gitea_gitea
|
||||
host_secret_bstein_dev["secret.bstein.dev"]
|
||||
svc_vault_vault["vault/vault (Service)"]
|
||||
host_secret_bstein_dev --> svc_vault_vault
|
||||
wl_vault_vault["vault/vault (StatefulSet)"]
|
||||
svc_vault_vault --> wl_vault_vault
|
||||
host_sso_bstein_dev["sso.bstein.dev"]
|
||||
svc_sso_keycloak["sso/keycloak (Service)"]
|
||||
host_sso_bstein_dev --> svc_sso_keycloak
|
||||
wl_sso_keycloak["sso/keycloak (Deployment)"]
|
||||
svc_sso_keycloak --> wl_sso_keycloak
|
||||
host_stream_bstein_dev["stream.bstein.dev"]
|
||||
svc_jellyfin_jellyfin["jellyfin/jellyfin (Service)"]
|
||||
host_stream_bstein_dev --> svc_jellyfin_jellyfin
|
||||
wl_jellyfin_jellyfin["jellyfin/jellyfin (Deployment)"]
|
||||
svc_jellyfin_jellyfin --> wl_jellyfin_jellyfin
|
||||
host_tasks_bstein_dev["tasks.bstein.dev"]
|
||||
svc_planka_planka["planka/planka (Service)"]
|
||||
host_tasks_bstein_dev --> svc_planka_planka
|
||||
wl_planka_planka["planka/planka (Deployment)"]
|
||||
svc_planka_planka --> wl_planka_planka
|
||||
host_vault_bstein_dev["vault.bstein.dev"]
|
||||
svc_vaultwarden_vaultwarden_service["vaultwarden/vaultwarden-service (Service)"]
|
||||
host_vault_bstein_dev --> svc_vaultwarden_vaultwarden_service
|
||||
wl_vaultwarden_vaultwarden["vaultwarden/vaultwarden (Deployment)"]
|
||||
svc_vaultwarden_vaultwarden_service --> wl_vaultwarden_vaultwarden
|
||||
|
||||
subgraph bstein_dev_home[bstein-dev-home]
|
||||
svc_bstein_dev_home_bstein_dev_home_frontend
|
||||
wl_bstein_dev_home_bstein_dev_home_frontend
|
||||
svc_bstein_dev_home_bstein_dev_home_backend
|
||||
wl_bstein_dev_home_bstein_dev_home_backend
|
||||
svc_bstein_dev_home_chat_ai_gateway
|
||||
wl_bstein_dev_home_chat_ai_gateway
|
||||
end
|
||||
subgraph comms[comms]
|
||||
svc_comms_matrix_wellknown
|
||||
wl_comms_matrix_wellknown
|
||||
svc_comms_element_call
|
||||
wl_comms_element_call
|
||||
svc_comms_livekit_token_service
|
||||
wl_comms_livekit_token_service
|
||||
svc_comms_livekit
|
||||
wl_comms_livekit
|
||||
svc_comms_othrys_synapse_matrix_synapse
|
||||
svc_comms_matrix_guest_register
|
||||
wl_comms_matrix_guest_register
|
||||
svc_comms_matrix_authentication_service
|
||||
wl_comms_matrix_authentication_service
|
||||
end
|
||||
subgraph crypto[crypto]
|
||||
svc_crypto_monerod
|
||||
wl_crypto_monerod
|
||||
end
|
||||
subgraph finance[finance]
|
||||
svc_finance_actual_budget
|
||||
wl_finance_actual_budget
|
||||
svc_finance_firefly
|
||||
wl_finance_firefly
|
||||
end
|
||||
subgraph gitea[gitea]
|
||||
svc_gitea_gitea
|
||||
wl_gitea_gitea
|
||||
end
|
||||
subgraph health[health]
|
||||
svc_health_wger
|
||||
wl_health_wger
|
||||
end
|
||||
subgraph jellyfin[jellyfin]
|
||||
svc_jellyfin_pegasus
|
||||
wl_jellyfin_pegasus
|
||||
svc_jellyfin_jellyfin
|
||||
wl_jellyfin_jellyfin
|
||||
end
|
||||
subgraph jenkins[jenkins]
|
||||
svc_jenkins_jenkins
|
||||
wl_jenkins_jenkins
|
||||
end
|
||||
subgraph logging[logging]
|
||||
svc_logging_oauth2_proxy_logs
|
||||
wl_logging_oauth2_proxy_logs
|
||||
end
|
||||
subgraph longhorn_system[longhorn-system]
|
||||
svc_longhorn_system_oauth2_proxy_longhorn
|
||||
wl_longhorn_system_oauth2_proxy_longhorn
|
||||
end
|
||||
subgraph mailu_mailserver[mailu-mailserver]
|
||||
svc_mailu_mailserver_mailu_front
|
||||
end
|
||||
subgraph nextcloud[nextcloud]
|
||||
svc_nextcloud_nextcloud
|
||||
wl_nextcloud_nextcloud
|
||||
svc_nextcloud_collabora
|
||||
wl_nextcloud_collabora
|
||||
end
|
||||
subgraph outline[outline]
|
||||
svc_outline_outline
|
||||
wl_outline_outline
|
||||
end
|
||||
subgraph planka[planka]
|
||||
svc_planka_planka
|
||||
wl_planka_planka
|
||||
end
|
||||
subgraph sso[sso]
|
||||
svc_sso_oauth2_proxy
|
||||
wl_sso_oauth2_proxy
|
||||
svc_sso_keycloak
|
||||
wl_sso_keycloak
|
||||
end
|
||||
subgraph vault[vault]
|
||||
svc_vault_vault
|
||||
wl_vault_vault
|
||||
end
|
||||
subgraph vaultwarden[vaultwarden]
|
||||
svc_vaultwarden_vaultwarden_service
|
||||
wl_vaultwarden_vaultwarden
|
||||
end
|
||||
29
services/atlasbot/kustomization.yaml
Normal file
29
services/atlasbot/kustomization.yaml
Normal file
@ -0,0 +1,29 @@
|
||||
# services/atlasbot/kustomization.yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: ai
|
||||
resources:
|
||||
- atlasbot-deployment.yaml
|
||||
- atlasbot-service.yaml
|
||||
- atlasbot-rbac.yaml
|
||||
- secretproviderclass.yaml
|
||||
- vault-sync-deployment.yaml
|
||||
- image.yaml
|
||||
- image-automation.yaml
|
||||
images:
|
||||
- name: registry.bstein.dev/bstein/atlasbot
|
||||
newTag: 0.1.2-106 # {"$imagepolicy": "ai:atlasbot:tag"}
|
||||
configMapGenerator:
|
||||
- name: atlasbot-vault-env
|
||||
files:
|
||||
- atlasbot_vault_env.sh=scripts/atlasbot_vault_env.sh
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
- name: atlas-kb
|
||||
files:
|
||||
- INDEX.md=knowledge/INDEX.md
|
||||
- atlas.json=knowledge/catalog/atlas.json
|
||||
- atlas-summary.json=knowledge/catalog/atlas-summary.json
|
||||
- metrics.json=knowledge/catalog/metrics.json
|
||||
- runbooks.json=knowledge/catalog/runbooks.json
|
||||
- atlas-http.mmd=knowledge/diagrams/atlas-http.mmd
|
||||
44
services/atlasbot/scripts/atlasbot_vault_env.sh
Normal file
44
services/atlasbot/scripts/atlasbot_vault_env.sh
Normal file
@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env sh
|
||||
set -eu
|
||||
|
||||
vault_dir="/vault/secrets"
|
||||
|
||||
read_secret() {
|
||||
tr -d '\r\n' < "${vault_dir}/$1"
|
||||
}
|
||||
|
||||
read_optional() {
|
||||
if [ -f "${vault_dir}/$1" ]; then
|
||||
tr -d '\r\n' < "${vault_dir}/$1"
|
||||
else
|
||||
printf ''
|
||||
fi
|
||||
}
|
||||
|
||||
export TURN_STATIC_AUTH_SECRET="$(read_secret turn-secret)"
|
||||
export TURN_PASSWORD="${TURN_STATIC_AUTH_SECRET}"
|
||||
|
||||
export LIVEKIT_API_SECRET="$(read_secret livekit-primary)"
|
||||
export LIVEKIT_SECRET="${LIVEKIT_API_SECRET}"
|
||||
|
||||
export BOT_PASS="$(read_secret bot-pass)"
|
||||
export BOT_PASS_QUICK="$(read_optional bot-quick-pass)"
|
||||
export BOT_PASS_SMART="$(read_optional bot-smart-pass)"
|
||||
export BOT_PASS_GENIUS="$(read_optional bot-genius-pass)"
|
||||
if [ -z "${BOT_PASS_SMART}" ]; then
|
||||
export BOT_PASS_SMART="${BOT_PASS}"
|
||||
fi
|
||||
if [ -z "${BOT_PASS_GENIUS}" ]; then
|
||||
export BOT_PASS_GENIUS="${BOT_PASS_SMART}"
|
||||
fi
|
||||
export SEEDER_PASS="$(read_secret seeder-pass)"
|
||||
|
||||
export CHAT_API_KEY="$(read_secret chat-matrix)"
|
||||
export CHAT_API_HOMEPAGE="$(read_secret chat-homepage)"
|
||||
|
||||
export MAS_ADMIN_CLIENT_SECRET_FILE="${vault_dir}/mas-admin-secret"
|
||||
export PGPASSWORD="$(read_secret synapse-db-pass)"
|
||||
|
||||
export MAS_DB_PASSWORD="$(read_secret mas-db-pass)"
|
||||
export MATRIX_SHARED_SECRET="$(read_secret mas-matrix-shared)"
|
||||
export KEYCLOAK_CLIENT_SECRET="$(read_secret mas-kc-secret)"
|
||||
@ -1,14 +1,14 @@
|
||||
# services/typhon/secretproviderclass.yaml
|
||||
# services/atlasbot/secretproviderclass.yaml
|
||||
apiVersion: secrets-store.csi.x-k8s.io/v1
|
||||
kind: SecretProviderClass
|
||||
metadata:
|
||||
name: typhon-vault
|
||||
namespace: climate
|
||||
name: atlasbot-vault
|
||||
namespace: ai
|
||||
spec:
|
||||
provider: vault
|
||||
parameters:
|
||||
vaultAddress: "http://vault.vault.svc.cluster.local:8200"
|
||||
roleName: "typhon"
|
||||
roleName: "ai"
|
||||
objects: |
|
||||
- objectName: "harbor-pull__dockerconfigjson"
|
||||
secretPath: "kv/data/atlas/shared/harbor-pull"
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user