Compare commits
105 Commits
codex/aria
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
18d518b47a | ||
|
|
e22a47b65e | ||
|
|
a3b140e090 | ||
|
|
533e284752 | ||
|
|
54ffc65165 | ||
|
|
831a0fda1b | ||
|
|
e36fc5229d | ||
|
|
bdb9b47291 | ||
|
|
9dc76b10f4 | ||
|
|
02a4b852ad | ||
|
|
3649262316 | ||
|
|
84cc4e6236 | ||
|
|
b21b182199 | ||
|
|
e9f48269d3 | ||
|
|
969ec81fd9 | ||
|
|
1b0137d984 | ||
|
|
949ef2c6ad | ||
|
|
f10540b4a9 | ||
|
|
21407bdf39 | ||
|
|
369760c841 | ||
|
|
98bf1044b5 | ||
|
|
91648526ef | ||
|
|
ec232d0079 | ||
|
|
19372c8a9a | ||
|
|
604b198534 | ||
|
|
4bbb50f5ad | ||
|
|
f9910c4281 | ||
|
|
8f349e88b2 | ||
|
|
70a08768b4 | ||
|
|
77fc16a7cb | ||
|
|
c41ff7ab3b | ||
|
|
186f7927ba | ||
|
|
39d671c98d | ||
|
|
a8b1e5ac7c | ||
|
|
102eaf8d92 | ||
|
|
236f2a5318 | ||
|
|
71c1b9b7bf | ||
|
|
13594eba57 | ||
|
|
1434fbedf1 | ||
|
|
86e9a2d82b | ||
|
|
a8a9f04c44 | ||
|
|
8fec20e816 | ||
|
|
faff23408a | ||
|
|
160dbd5f3d | ||
|
|
1b2d30e67e | ||
|
|
e6c7b1ab9f | ||
|
|
d3ae03f935 | ||
|
|
9965983322 | ||
|
|
415da50fa1 | ||
|
|
24c3d842c1 | ||
|
|
4fec10d1ee | ||
|
|
dc4e76c90d | ||
|
|
bca3d87743 | ||
|
|
03f9118f21 | ||
|
|
40c1a3652b | ||
|
|
01ccdd3fcb | ||
|
|
4966cc7f35 | ||
|
|
2a14d28713 | ||
|
|
0c94ee93ce | ||
|
|
b73e678bfc | ||
|
|
6b6b9677be | ||
|
|
a17654819c | ||
|
|
9a28ea0086 | ||
|
|
ed1fc729d7 | ||
|
|
c07570494a | ||
|
|
c0ac1e23a7 | ||
|
|
cbe774acfd | ||
|
|
f0e161ba8b | ||
|
|
152c19665e | ||
|
|
0fa6138612 | ||
|
|
18a6471c08 | ||
|
|
c11996d860 | ||
|
|
7d9b649a43 | ||
|
|
b5d60fb3be | ||
|
|
d999b4ff8c | ||
|
|
2477ca3899 | ||
|
|
f0baa619dc | ||
|
|
63a64661ec | ||
|
|
7f284007eb | ||
|
|
67db7b8438 | ||
|
|
f95c51e7f5 | ||
|
|
b3c86752e3 | ||
|
|
20fd0a9f38 | ||
|
|
7e281e6548 | ||
|
|
b9951da1ae | ||
|
|
6e1416d1ae | ||
|
|
2eadf55557 | ||
|
|
3c157b9523 | ||
|
|
c64aca3869 | ||
|
|
783b089af2 | ||
|
|
eb05d0bd50 | ||
| 6e2d5ea6ed | |||
| 05b788c118 | |||
| 26cc9333c7 | |||
| a57577e2a5 | |||
| bbb958b7c5 | |||
| 6f4c141d97 | |||
| eb931e8d46 | |||
| de2523c313 | |||
| 1094323f1a | |||
| 27788d307f | |||
| 4cc2f0c355 | |||
| 2ff3686700 | |||
| ad99a83a98 | |||
| 1dcc37e8a7 |
@ -5,8 +5,9 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY ariadne /app/ariadne
|
||||
COPY --chown=ariadne:ariadne ariadne /app/ariadne
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
USER ariadne
|
||||
CMD ["uvicorn", "ariadne.app:app", "--host", "0.0.0.0", "--port", "8080"]
|
||||
|
||||
@ -6,4 +6,9 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt /app/requirements.txt
|
||||
RUN pip install --no-cache-dir -r /app/requirements.txt
|
||||
RUN pip install --no-cache-dir -r /app/requirements.txt && \
|
||||
addgroup --system ariadne && \
|
||||
adduser --system --ingroup ariadne --home /app ariadne && \
|
||||
chown -R ariadne:ariadne /app
|
||||
|
||||
USER ariadne
|
||||
|
||||
@ -1,6 +1,10 @@
|
||||
FROM registry.bstein.dev/bstein/ariadne-base:py312
|
||||
|
||||
USER root
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements-dev.txt /app/requirements-dev.txt
|
||||
RUN pip install --no-cache-dir -r /app/requirements-dev.txt
|
||||
RUN pip install --no-cache-dir -r /app/requirements-dev.txt && \
|
||||
chown -R ariadne:ariadne /app
|
||||
|
||||
USER ariadne
|
||||
|
||||
267
Jenkinsfile
vendored
267
Jenkinsfile
vendored
@ -1,7 +1,6 @@
|
||||
pipeline {
|
||||
agent {
|
||||
kubernetes {
|
||||
label 'ariadne'
|
||||
defaultContainer 'builder'
|
||||
yaml """
|
||||
apiVersion: v1
|
||||
@ -17,7 +16,7 @@ spec:
|
||||
- name: harbor-robot-pipeline
|
||||
containers:
|
||||
- name: dind
|
||||
image: docker:27-dind
|
||||
image: registry.bstein.dev/bstein/docker:27-dind
|
||||
securityContext:
|
||||
privileged: true
|
||||
env:
|
||||
@ -27,11 +26,12 @@ spec:
|
||||
- --mtu=1400
|
||||
- --host=unix:///var/run/docker.sock
|
||||
- --host=tcp://0.0.0.0:2375
|
||||
- --tls=false
|
||||
volumeMounts:
|
||||
- name: dind-storage
|
||||
mountPath: /var/lib/docker
|
||||
- name: builder
|
||||
image: docker:27
|
||||
image: registry.bstein.dev/bstein/docker:27
|
||||
command: ["cat"]
|
||||
tty: true
|
||||
env:
|
||||
@ -49,7 +49,14 @@ spec:
|
||||
- name: harbor-config
|
||||
mountPath: /docker-config
|
||||
- name: tester
|
||||
image: python:3.12-slim
|
||||
image: registry.bstein.dev/bstein/python:3.12-slim
|
||||
command: ["cat"]
|
||||
tty: true
|
||||
volumeMounts:
|
||||
- name: workspace-volume
|
||||
mountPath: /home/jenkins/agent
|
||||
- name: quality-tools
|
||||
image: registry.bstein.dev/bstein/quality-tools:sonar8.0.1-trivy0.70.0-db20260422-arm64
|
||||
command: ["cat"]
|
||||
tty: true
|
||||
volumeMounts:
|
||||
@ -76,14 +83,21 @@ spec:
|
||||
IMAGE = "${REGISTRY}/ariadne"
|
||||
VERSION_TAG = 'dev'
|
||||
SEMVER = 'dev'
|
||||
COVERAGE_MIN = '99'
|
||||
COVERAGE_MIN = '95'
|
||||
COVERAGE_JSON = 'build/coverage.json'
|
||||
JUNIT_XML = 'build/junit.xml'
|
||||
SUITE_NAME = 'ariadne'
|
||||
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
|
||||
SONARQUBE_HOST_URL = 'http://sonarqube.quality.svc.cluster.local:9000'
|
||||
SONARQUBE_PROJECT_KEY = 'ariadne'
|
||||
SONARQUBE_TOKEN = credentials('sonarqube-token')
|
||||
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
|
||||
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
|
||||
BUILDKIT_IMAGE = 'registry.bstein.dev/bstein/buildkit:buildx-stable-1'
|
||||
}
|
||||
options {
|
||||
disableConcurrentBuilds()
|
||||
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
|
||||
}
|
||||
triggers {
|
||||
pollSCM('H/2 * * * *')
|
||||
@ -95,22 +109,171 @@ spec:
|
||||
}
|
||||
}
|
||||
|
||||
stage('Unit tests') {
|
||||
stage('Collect SonarQube evidence') {
|
||||
steps {
|
||||
container('quality-tools') {
|
||||
sh '''#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
mkdir -p build
|
||||
args=(
|
||||
"-Dsonar.host.url=${SONARQUBE_HOST_URL}"
|
||||
"-Dsonar.login=${SONARQUBE_TOKEN}"
|
||||
"-Dsonar.projectKey=${SONARQUBE_PROJECT_KEY}"
|
||||
"-Dsonar.projectName=${SONARQUBE_PROJECT_KEY}"
|
||||
"-Dsonar.sources=."
|
||||
"-Dsonar.exclusions=**/.git/**,**/build/**,**/dist/**,**/node_modules/**,**/.venv/**,**/__pycache__/**,**/coverage/**,**/test-results/**,**/playwright-report/**"
|
||||
"-Dsonar.test.inclusions=**/tests/**,**/testing/**,**/*_test.go,**/*.test.ts,**/*.test.tsx,**/*.spec.ts,**/*.spec.tsx"
|
||||
)
|
||||
[ -f build/coverage.xml ] && args+=("-Dsonar.python.coverage.reportPaths=build/coverage.xml")
|
||||
set +e
|
||||
sonar-scanner "${args[@]}" | tee build/sonar-scanner.log
|
||||
rc=${PIPESTATUS[0]}
|
||||
set -e
|
||||
printf '%s\n' "${rc}" > build/sonarqube-analysis.rc
|
||||
'''
|
||||
}
|
||||
container('tester') {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
mkdir -p build
|
||||
python3 - <<'PY'
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import urllib.parse
|
||||
import urllib.request
|
||||
|
||||
host = os.getenv('SONARQUBE_HOST_URL', '').strip().rstrip('/')
|
||||
project_key = os.getenv('SONARQUBE_PROJECT_KEY', '').strip()
|
||||
token = os.getenv('SONARQUBE_TOKEN', '').strip()
|
||||
report_path = os.getenv('QUALITY_GATE_SONARQUBE_REPORT', 'build/sonarqube-quality-gate.json')
|
||||
payload = {"status": "ERROR", "note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY"}
|
||||
if host and project_key:
|
||||
query = urllib.parse.urlencode({"projectKey": project_key})
|
||||
request = urllib.request.Request(f"{host}/api/qualitygates/project_status?{query}", method="GET")
|
||||
if token:
|
||||
encoded = base64.b64encode(f"{token}:".encode("utf-8")).decode("utf-8")
|
||||
request.add_header("Authorization", f"Basic {encoded}")
|
||||
try:
|
||||
with urllib.request.urlopen(request, timeout=12) as response:
|
||||
payload = json.loads(response.read().decode("utf-8"))
|
||||
except Exception as exc: # noqa: BLE001
|
||||
payload = {"status": "ERROR", "error": str(exc)}
|
||||
with open(report_path, "w", encoding="utf-8") as handle:
|
||||
json.dump(payload, handle, indent=2, sort_keys=True)
|
||||
handle.write("\\n")
|
||||
PY
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Collect Supply Chain evidence') {
|
||||
steps {
|
||||
container('quality-tools') {
|
||||
sh '''#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
mkdir -p build
|
||||
set +e
|
||||
trivy fs --cache-dir "${TRIVY_CACHE_DIR}" --skip-db-update --timeout 5m --no-progress --format json --output build/trivy-fs.json --scanners vuln,secret,misconfig --severity HIGH,CRITICAL .
|
||||
trivy_rc=$?
|
||||
set -e
|
||||
if [ ! -s build/trivy-fs.json ]; then
|
||||
cat > build/ironbank-compliance.json <<EOF
|
||||
{"status":"failed","compliant":false,"scanner":"trivy","scan_type":"filesystem","error":"trivy did not produce JSON output","trivy_rc":${trivy_rc}}
|
||||
EOF
|
||||
exit 0
|
||||
fi
|
||||
critical="$(jq '[.Results[]? | .Vulnerabilities[]? | select(.Severity=="CRITICAL")] | length' build/trivy-fs.json)"
|
||||
high="$(jq '[.Results[]? | .Vulnerabilities[]? | select(.Severity=="HIGH")] | length' build/trivy-fs.json)"
|
||||
secrets="$(jq '[.Results[]? | .Secrets[]?] | length' build/trivy-fs.json)"
|
||||
misconfigs="$(jq '[.Results[]? | .Misconfigurations[]? | select(.Status=="FAIL" and (.Severity=="CRITICAL" or .Severity=="HIGH"))] | length' build/trivy-fs.json)"
|
||||
status=ok
|
||||
compliant=true
|
||||
if [ "${critical}" -gt 0 ] || [ "${secrets}" -gt 0 ] || [ "${misconfigs}" -gt 0 ]; then
|
||||
status=failed
|
||||
compliant=false
|
||||
fi
|
||||
jq -n --arg status "${status}" --argjson compliant "${compliant}" --argjson critical "${critical}" --argjson high "${high}" --argjson secrets "${secrets}" --argjson misconfigs "${misconfigs}" --argjson trivy_rc "${trivy_rc}" \
|
||||
'{status:$status, compliant:$compliant, category:"artifact_security", scan_type:"filesystem", scanner:"trivy", critical_vulnerabilities:$critical, high_vulnerabilities:$high, secrets:$secrets, high_or_critical_misconfigurations:$misconfigs, trivy_rc:$trivy_rc, high_vulnerability_policy:"observe"}' > build/ironbank-compliance.json
|
||||
'''
|
||||
}
|
||||
container('tester') {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
mkdir -p build
|
||||
python3 - <<'PY'
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
report_path = Path(os.getenv('QUALITY_GATE_IRONBANK_REPORT', 'build/ironbank-compliance.json'))
|
||||
if report_path.exists():
|
||||
raise SystemExit(0)
|
||||
status = os.getenv('IRONBANK_COMPLIANCE_STATUS', '').strip()
|
||||
compliant = os.getenv('IRONBANK_COMPLIANT', '').strip().lower()
|
||||
payload = {"status": status or "unknown", "compliant": compliant in {"1", "true", "yes", "on"} if compliant else None}
|
||||
payload = {k: v for k, v in payload.items() if v is not None}
|
||||
if "status" not in payload:
|
||||
payload["status"] = "unknown"
|
||||
payload["note"] = "Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT or write build/ironbank-compliance.json in image-building repos."
|
||||
report_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
report_path.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\\n", encoding="utf-8")
|
||||
PY
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Run quality gate') {
|
||||
steps {
|
||||
container('tester') {
|
||||
sh(script: '''
|
||||
set -euo pipefail
|
||||
mkdir -p build
|
||||
set +e
|
||||
python -m pip install --no-cache-dir -r requirements.txt -r requirements-dev.txt
|
||||
python -m ruff check ariadne --select PLR
|
||||
python scripts/check_file_sizes.py --roots ariadne scripts tests --max-lines 500 --waivers scripts/loc_hygiene_waivers.tsv
|
||||
python -m slipcover \
|
||||
--json \
|
||||
--out "${COVERAGE_JSON}" \
|
||||
--source ariadne \
|
||||
--fail-under "${COVERAGE_MIN}" \
|
||||
-m pytest -ra -vv --durations=20 --junitxml "${JUNIT_XML}"
|
||||
python -c "import json; payload=json.load(open('build/coverage.json', encoding='utf-8')); percent=(payload.get('summary') or {}).get('percent_covered'); print(f'Coverage summary: {percent:.2f}%' if percent is not None else 'Coverage summary unavailable')"
|
||||
install_rc=$?
|
||||
docs_rc=1
|
||||
lint_rc=1
|
||||
loc_rc=1
|
||||
tests_rc=1
|
||||
coverage_contract_rc=0
|
||||
gate_rc=1
|
||||
if [ "${install_rc}" -eq 0 ]; then
|
||||
python scripts/check_docstrings.py --root ariadne
|
||||
docs_rc=$?
|
||||
python -m ruff check ariadne scripts --select PLR
|
||||
lint_rc=$?
|
||||
python scripts/check_file_sizes.py --roots ariadne scripts tests --max-lines 500 --waivers ci/loc_hygiene_waivers.tsv
|
||||
loc_rc=$?
|
||||
python -m slipcover \
|
||||
--json \
|
||||
--out "${COVERAGE_JSON}" \
|
||||
--source ariadne \
|
||||
--fail-under "${COVERAGE_MIN}" \
|
||||
-m pytest -ra -vv --durations=20 --junitxml "${JUNIT_XML}"
|
||||
tests_rc=$?
|
||||
python -c "import json; payload=json.load(open('build/coverage.json', encoding='utf-8')); percent=(payload.get('summary') or {}).get('percent_covered'); print(f'Coverage summary: {percent:.2f}%' if percent is not None else 'Coverage summary unavailable')" || true
|
||||
if [ -f "${COVERAGE_JSON}" ] && [ -f scripts/check_coverage_contract.py ]; then
|
||||
python scripts/check_coverage_contract.py "${COVERAGE_JSON}" --source-root ariadne --threshold "${COVERAGE_MIN}"
|
||||
coverage_contract_rc=$?
|
||||
else
|
||||
echo "coverage contract check skipped: checker or coverage report missing"
|
||||
fi
|
||||
fi
|
||||
printf '%s\n' "${docs_rc}" > build/docs-naming.rc
|
||||
|
||||
if [ "${install_rc}" -eq 0 ]; then
|
||||
gate_rc=0
|
||||
[ "${docs_rc}" -eq 0 ] || gate_rc=1
|
||||
[ "${lint_rc}" -eq 0 ] || gate_rc=1
|
||||
[ "${loc_rc}" -eq 0 ] || gate_rc=1
|
||||
[ "${tests_rc}" -eq 0 ] || gate_rc=1
|
||||
[ "${coverage_contract_rc}" -eq 0 ] || gate_rc=1
|
||||
fi
|
||||
set -e
|
||||
printf '%s\n' "${gate_rc}" > build/quality-gate.rc
|
||||
'''.stripIndent())
|
||||
}
|
||||
}
|
||||
@ -121,7 +284,18 @@ python -c "import json; payload=json.load(open('build/coverage.json', encoding='
|
||||
container('tester') {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
python scripts/publish_test_metrics.py
|
||||
python scripts/publish_test_metrics.py || true
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Enforce quality gate') {
|
||||
steps {
|
||||
container('tester') {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
test "$(cat build/quality-gate.rc 2>/dev/null || echo 1)" -eq 0
|
||||
'''
|
||||
}
|
||||
}
|
||||
@ -177,21 +351,76 @@ python -c "import json; payload=json.load(open('build/coverage.json', encoding='
|
||||
fi
|
||||
BUILDER_NAME="ariadne-${BUILD_NUMBER}"
|
||||
docker buildx rm "${BUILDER_NAME}" >/dev/null 2>&1 || true
|
||||
docker buildx create --name "${BUILDER_NAME}" --driver docker-container --bootstrap --use
|
||||
attempt=1
|
||||
while [ "${attempt}" -le 3 ]; do
|
||||
if docker buildx create --name "${BUILDER_NAME}" --driver docker-container --driver-opt "image=${BUILDKIT_IMAGE}" --bootstrap --use; then
|
||||
break
|
||||
fi
|
||||
docker buildx rm "${BUILDER_NAME}" >/dev/null 2>&1 || true
|
||||
echo "buildx bootstrap attempt ${attempt}/3 failed; retrying after registry/network backoff" >&2
|
||||
sleep $((attempt * 15))
|
||||
attempt=$((attempt + 1))
|
||||
done
|
||||
if [ "${attempt}" -gt 3 ]; then
|
||||
echo "buildx bootstrap failed after retries" >&2
|
||||
exit 1
|
||||
fi
|
||||
docker buildx inspect "${BUILDER_NAME}" --bootstrap
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Build & push base image') {
|
||||
steps {
|
||||
container('builder') {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
retry_buildx() {
|
||||
attempt=1
|
||||
while [ "${attempt}" -le 3 ]; do
|
||||
if docker buildx build "$@"; then
|
||||
return 0
|
||||
fi
|
||||
echo "buildx attempt ${attempt}/3 failed; retrying after registry/network backoff" >&2
|
||||
sleep $((attempt * 15))
|
||||
attempt=$((attempt + 1))
|
||||
done
|
||||
return 1
|
||||
}
|
||||
retry_buildx \
|
||||
--platform linux/arm64 \
|
||||
--network host \
|
||||
--file Dockerfile.base \
|
||||
--tag "${REGISTRY}/ariadne-base:py312" \
|
||||
--push \
|
||||
.
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage('Build & push image') {
|
||||
steps {
|
||||
container('builder') {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
VERSION_TAG="$(cut -d= -f2 build.env)"
|
||||
docker buildx build \
|
||||
retry_buildx() {
|
||||
attempt=1
|
||||
while [ "${attempt}" -le 3 ]; do
|
||||
if docker buildx build "$@"; then
|
||||
return 0
|
||||
fi
|
||||
echo "buildx attempt ${attempt}/3 failed; retrying after registry/network backoff" >&2
|
||||
sleep $((attempt * 15))
|
||||
attempt=$((attempt + 1))
|
||||
done
|
||||
return 1
|
||||
}
|
||||
retry_buildx \
|
||||
--platform linux/arm64 \
|
||||
--network host \
|
||||
--tag "${IMAGE}:${VERSION_TAG}" \
|
||||
--tag "${IMAGE}:latest" \
|
||||
--push \
|
||||
@ -213,7 +442,7 @@ python -c "import json; payload=json.load(open('build/coverage.json', encoding='
|
||||
}
|
||||
}
|
||||
}
|
||||
archiveArtifacts artifacts: 'build/junit.xml,build/coverage.json', allowEmptyArchive: true, fingerprint: true
|
||||
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
|
||||
script {
|
||||
def props = fileExists('build.env') ? readProperties(file: 'build.env') : [:]
|
||||
echo "Build complete for ${props['SEMVER'] ?: env.VERSION_TAG}"
|
||||
|
||||
840
ariadne/app.py
840
ariadne/app.py
@ -1,69 +1,49 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
import threading
|
||||
from typing import Any, Callable
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
from fastapi import Body, Depends, FastAPI, HTTPException, Request
|
||||
from fastapi.responses import JSONResponse, Response
|
||||
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest
|
||||
|
||||
from .app_account_routes import _register_account_routes
|
||||
from .app_admin_routes import _register_admin_routes
|
||||
from .auth.keycloak import AuthContext, authenticator
|
||||
from .db.database import Database, DatabaseConfig
|
||||
from .db.storage import Storage, TaskRunRecord
|
||||
from .db.storage import Storage
|
||||
from .manager.provisioning import ProvisioningManager
|
||||
from .metrics.metrics import record_task_run
|
||||
from .scheduler.cron import CronScheduler
|
||||
from .services.cluster_state import run_cluster_state
|
||||
from .services.comms import comms
|
||||
from .services.firefly import firefly
|
||||
from .services.image_sweeper import image_sweeper
|
||||
from .services.jenkins_build_weather import collect_jenkins_build_weather
|
||||
from .services.jenkins_workspace_cleanup import cleanup_jenkins_workspace_storage
|
||||
from .services.keycloak_admin import keycloak_admin
|
||||
from .services.keycloak_profile import run_profile_sync
|
||||
from .services.mailu import mailu
|
||||
from .services.mailu_events import mailu_events
|
||||
from .services.nextcloud import nextcloud
|
||||
from .services.image_sweeper import image_sweeper
|
||||
from .services.metis import metis
|
||||
from .services.metis_token_sync import metis_token_sync
|
||||
from .services.nextcloud import nextcloud
|
||||
from .services.opensearch_prune import prune_indices
|
||||
from .services.platform_quality_probe import platform_quality_probe
|
||||
from .services.pod_cleaner import clean_finished_pods
|
||||
from .services.vaultwarden_sync import run_vaultwarden_sync
|
||||
from .services.vault import vault
|
||||
from .services.vaultwarden_sync import run_vaultwarden_sync
|
||||
from .services.wger import wger
|
||||
from .settings import settings
|
||||
from .utils.errors import safe_error_detail
|
||||
from .utils.http import extract_bearer_token
|
||||
from .utils.logging import LogConfig, configure_logging, get_logger, task_context
|
||||
from .utils.logging import LogConfig, configure_logging, get_logger
|
||||
from .utils.passwords import random_password
|
||||
|
||||
|
||||
configure_logging(LogConfig(level=settings.log_level))
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AccountTaskContext:
|
||||
task_name: str
|
||||
username: str
|
||||
started: datetime
|
||||
extra: dict[str, Any] | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PasswordResetRequest:
|
||||
task_name: str
|
||||
service_label: str
|
||||
username: str
|
||||
mailu_email: str
|
||||
password: str
|
||||
sync_fn: Callable[[], dict[str, Any]]
|
||||
password_attr: str
|
||||
updated_attr: str
|
||||
error_hint: str
|
||||
|
||||
portal_db = Database(
|
||||
settings.portal_database_url,
|
||||
DatabaseConfig(
|
||||
@ -91,6 +71,7 @@ ariadne_db = Database(
|
||||
storage = Storage(ariadne_db, portal_db)
|
||||
provisioning = ProvisioningManager(portal_db, storage)
|
||||
scheduler = CronScheduler(storage, settings.schedule_tick_sec)
|
||||
app = FastAPI(title=settings.app_name)
|
||||
|
||||
|
||||
def _record_event(event_type: str, detail: dict[str, Any] | str | None) -> None:
|
||||
@ -109,9 +90,6 @@ def _parse_event_detail(detail: str | None) -> Any:
|
||||
return detail
|
||||
|
||||
|
||||
app = FastAPI(title=settings.app_name)
|
||||
|
||||
|
||||
def _require_auth(request: Request) -> AuthContext:
|
||||
token = extract_bearer_token(request)
|
||||
if not token:
|
||||
@ -167,92 +145,8 @@ def _allowed_flag_groups() -> list[str]:
|
||||
return settings.allowed_flag_groups
|
||||
|
||||
|
||||
def _resolve_mailu_email(username: str) -> str:
|
||||
mailu_email = f"{username}@{settings.mailu_domain}"
|
||||
try:
|
||||
user = keycloak_admin.find_user(username) or {}
|
||||
attrs = user.get("attributes") if isinstance(user, dict) else None
|
||||
if isinstance(attrs, dict):
|
||||
raw_mailu = attrs.get("mailu_email")
|
||||
if isinstance(raw_mailu, list) and raw_mailu:
|
||||
return str(raw_mailu[0])
|
||||
if isinstance(raw_mailu, str) and raw_mailu:
|
||||
return raw_mailu
|
||||
except Exception:
|
||||
return mailu_email
|
||||
return mailu_email
|
||||
|
||||
|
||||
def _record_account_task(ctx: AccountTaskContext, status: str, error_detail: str) -> None:
|
||||
finished = datetime.now(timezone.utc)
|
||||
duration_sec = (finished - ctx.started).total_seconds()
|
||||
record_task_run(ctx.task_name, status, duration_sec)
|
||||
try:
|
||||
storage.record_task_run(
|
||||
TaskRunRecord(
|
||||
request_code=None,
|
||||
task=ctx.task_name,
|
||||
status=status,
|
||||
detail=error_detail or None,
|
||||
started_at=ctx.started,
|
||||
finished_at=finished,
|
||||
duration_ms=int(duration_sec * 1000),
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
detail = {"username": ctx.username, "status": status, "error": error_detail}
|
||||
if ctx.extra:
|
||||
detail.update(ctx.extra)
|
||||
_record_event(ctx.task_name, detail)
|
||||
|
||||
|
||||
def _run_password_reset(request: PasswordResetRequest) -> JSONResponse:
|
||||
started = datetime.now(timezone.utc)
|
||||
task_ctx = AccountTaskContext(
|
||||
task_name=request.task_name,
|
||||
username=request.username,
|
||||
started=started,
|
||||
extra={"mailu_email": request.mailu_email},
|
||||
)
|
||||
status = "ok"
|
||||
error_detail = ""
|
||||
logger.info(
|
||||
f"{request.service_label} password reset requested",
|
||||
extra={"event": request.task_name, "username": request.username},
|
||||
)
|
||||
try:
|
||||
result = request.sync_fn()
|
||||
status_val = result.get("status") if isinstance(result, dict) else "error"
|
||||
if status_val != "ok":
|
||||
raise RuntimeError(f"{request.service_label} sync {status_val}")
|
||||
|
||||
keycloak_admin.set_user_attribute(
|
||||
request.username,
|
||||
request.password_attr,
|
||||
request.password,
|
||||
)
|
||||
keycloak_admin.set_user_attribute(
|
||||
request.username,
|
||||
request.updated_attr,
|
||||
datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"{request.service_label} password reset completed",
|
||||
extra={"event": request.task_name, "username": request.username},
|
||||
)
|
||||
return JSONResponse({"status": "ok", "password": request.password})
|
||||
except HTTPException as exc:
|
||||
status = "error"
|
||||
error_detail = str(exc.detail)
|
||||
raise
|
||||
except Exception as exc:
|
||||
status = "error"
|
||||
error_detail = safe_error_detail(exc, request.error_hint)
|
||||
raise HTTPException(status_code=502, detail=error_detail)
|
||||
finally:
|
||||
_record_account_task(task_ctx, status, error_detail)
|
||||
def _app_module() -> Any:
|
||||
return sys.modules[__name__]
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
@ -260,108 +154,34 @@ def _startup() -> None:
|
||||
provisioning.start()
|
||||
|
||||
scheduler.add_task("schedule.mailu_sync", settings.mailu_sync_cron, lambda: mailu.sync("ariadne_schedule"))
|
||||
scheduler.add_task(
|
||||
"schedule.nextcloud_sync",
|
||||
settings.nextcloud_sync_cron,
|
||||
lambda: nextcloud.sync_mail(wait=False),
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.nextcloud_cron",
|
||||
settings.nextcloud_cron,
|
||||
lambda: nextcloud.run_cron(),
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.nextcloud_maintenance",
|
||||
settings.nextcloud_maintenance_cron,
|
||||
lambda: nextcloud.run_maintenance(),
|
||||
)
|
||||
scheduler.add_task("schedule.nextcloud_sync", settings.nextcloud_sync_cron, lambda: nextcloud.sync_mail(wait=False))
|
||||
scheduler.add_task("schedule.nextcloud_cron", settings.nextcloud_cron, lambda: nextcloud.run_cron())
|
||||
scheduler.add_task("schedule.nextcloud_maintenance", settings.nextcloud_maintenance_cron, lambda: nextcloud.run_maintenance())
|
||||
scheduler.add_task("schedule.vaultwarden_sync", settings.vaultwarden_sync_cron, run_vaultwarden_sync)
|
||||
scheduler.add_task(
|
||||
"schedule.keycloak_profile",
|
||||
settings.keycloak_profile_cron,
|
||||
run_profile_sync,
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.wger_user_sync",
|
||||
settings.wger_user_sync_cron,
|
||||
lambda: wger.sync_users(),
|
||||
)
|
||||
scheduler.add_task("schedule.keycloak_profile", settings.keycloak_profile_cron, run_profile_sync)
|
||||
scheduler.add_task("schedule.wger_user_sync", settings.wger_user_sync_cron, lambda: wger.sync_users())
|
||||
scheduler.add_task("schedule.wger_admin", settings.wger_admin_cron, lambda: wger.ensure_admin(wait=False))
|
||||
scheduler.add_task(
|
||||
"schedule.firefly_user_sync",
|
||||
settings.firefly_user_sync_cron,
|
||||
lambda: firefly.sync_users(),
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.firefly_cron",
|
||||
settings.firefly_cron,
|
||||
lambda: firefly.run_cron(),
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.pod_cleaner",
|
||||
settings.pod_cleaner_cron,
|
||||
clean_finished_pods,
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.opensearch_prune",
|
||||
settings.opensearch_prune_cron,
|
||||
prune_indices,
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.image_sweeper",
|
||||
settings.image_sweeper_cron,
|
||||
lambda: image_sweeper.run(wait=True),
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.metis_sentinel_watch",
|
||||
settings.metis_sentinel_watch_cron,
|
||||
lambda: metis.watch_sentinel(),
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.metis_k3s_token_sync",
|
||||
settings.metis_k3s_token_sync_cron,
|
||||
lambda: metis_token_sync.run(wait=True),
|
||||
)
|
||||
scheduler.add_task("schedule.firefly_user_sync", settings.firefly_user_sync_cron, lambda: firefly.sync_users())
|
||||
scheduler.add_task("schedule.firefly_cron", settings.firefly_cron, lambda: firefly.run_cron())
|
||||
scheduler.add_task("schedule.pod_cleaner", settings.pod_cleaner_cron, clean_finished_pods)
|
||||
scheduler.add_task("schedule.opensearch_prune", settings.opensearch_prune_cron, prune_indices)
|
||||
scheduler.add_task("schedule.image_sweeper", settings.image_sweeper_cron, lambda: image_sweeper.run(wait=True))
|
||||
scheduler.add_task("schedule.metis_sentinel_watch", settings.metis_sentinel_watch_cron, lambda: metis.watch_sentinel())
|
||||
scheduler.add_task("schedule.metis_k3s_token_sync", settings.metis_k3s_token_sync_cron, lambda: metis_token_sync.run(wait=True))
|
||||
scheduler.add_task(
|
||||
"schedule.platform_quality_suite_probe",
|
||||
settings.platform_quality_suite_probe_cron,
|
||||
lambda: platform_quality_probe.run(wait=True),
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.vault_k8s_auth",
|
||||
settings.vault_k8s_auth_cron,
|
||||
lambda: vault.sync_k8s_auth(wait=True),
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.vault_oidc",
|
||||
settings.vault_oidc_cron,
|
||||
lambda: vault.sync_oidc(wait=True),
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.comms_guest_name",
|
||||
settings.comms_guest_name_cron,
|
||||
lambda: comms.run_guest_name_randomizer(wait=True),
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.comms_pin_invite",
|
||||
settings.comms_pin_invite_cron,
|
||||
lambda: comms.run_pin_invite(wait=True),
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.comms_reset_room",
|
||||
settings.comms_reset_room_cron,
|
||||
lambda: comms.run_reset_room(wait=True),
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.comms_seed_room",
|
||||
settings.comms_seed_room_cron,
|
||||
lambda: comms.run_seed_room(wait=True),
|
||||
)
|
||||
scheduler.add_task(
|
||||
"schedule.cluster_state",
|
||||
settings.cluster_state_cron,
|
||||
lambda: run_cluster_state(storage),
|
||||
)
|
||||
scheduler.add_task("schedule.jenkins_build_weather", settings.jenkins_build_weather_cron, collect_jenkins_build_weather)
|
||||
scheduler.add_task("schedule.jenkins_workspace_cleanup", settings.jenkins_workspace_cleanup_cron, cleanup_jenkins_workspace_storage)
|
||||
scheduler.add_task("schedule.vault_k8s_auth", settings.vault_k8s_auth_cron, lambda: vault.sync_k8s_auth(wait=True))
|
||||
scheduler.add_task("schedule.vault_oidc", settings.vault_oidc_cron, lambda: vault.sync_oidc(wait=True))
|
||||
scheduler.add_task("schedule.comms_guest_name", settings.comms_guest_name_cron, lambda: comms.run_guest_name_randomizer(wait=True))
|
||||
scheduler.add_task("schedule.comms_pin_invite", settings.comms_pin_invite_cron, lambda: comms.run_pin_invite(wait=True))
|
||||
scheduler.add_task("schedule.comms_reset_room", settings.comms_reset_room_cron, lambda: comms.run_reset_room(wait=True))
|
||||
scheduler.add_task("schedule.comms_seed_room", settings.comms_seed_room_cron, lambda: comms.run_seed_room(wait=True))
|
||||
scheduler.add_task("schedule.cluster_state", settings.cluster_state_cron, lambda: run_cluster_state(storage))
|
||||
scheduler.start()
|
||||
logger.info(
|
||||
"ariadne started",
|
||||
@ -382,6 +202,11 @@ def _startup() -> None:
|
||||
"metis_sentinel_watch_cron": settings.metis_sentinel_watch_cron,
|
||||
"metis_k3s_token_sync_cron": settings.metis_k3s_token_sync_cron,
|
||||
"platform_quality_suite_probe_cron": settings.platform_quality_suite_probe_cron,
|
||||
"jenkins_build_weather_cron": settings.jenkins_build_weather_cron,
|
||||
"jenkins_base_url": settings.jenkins_base_url,
|
||||
"jenkins_workspace_cleanup_cron": settings.jenkins_workspace_cleanup_cron,
|
||||
"jenkins_workspace_cleanup_dry_run": settings.jenkins_workspace_cleanup_dry_run,
|
||||
"jenkins_workspace_cleanup_max_deletions_per_run": settings.jenkins_workspace_cleanup_max_deletions_per_run,
|
||||
"vault_k8s_auth_cron": settings.vault_k8s_auth_cron,
|
||||
"vault_oidc_cron": settings.vault_oidc_cron,
|
||||
"comms_guest_name_cron": settings.comms_guest_name_cron,
|
||||
@ -405,591 +230,26 @@ def _shutdown() -> None:
|
||||
|
||||
@app.get("/health")
|
||||
def health() -> dict[str, Any]:
|
||||
"""Return a minimal liveness response for probes and operators."""
|
||||
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
@app.get(settings.metrics_path)
|
||||
def metrics() -> Response:
|
||||
"""Expose Prometheus metrics generated by Ariadne runtime tasks."""
|
||||
|
||||
payload = generate_latest()
|
||||
return Response(payload, media_type=CONTENT_TYPE_LATEST)
|
||||
|
||||
|
||||
@app.get("/api/admin/access/requests")
|
||||
def list_access_requests(ctx: AuthContext = Depends(_require_auth)) -> JSONResponse:
|
||||
_require_admin(ctx)
|
||||
logger.info(
|
||||
"list access requests",
|
||||
extra={"event": "access_requests_list", "actor": ctx.username or ""},
|
||||
)
|
||||
try:
|
||||
rows = storage.list_pending_requests()
|
||||
except Exception:
|
||||
raise HTTPException(status_code=502, detail="failed to load requests")
|
||||
|
||||
output: list[dict[str, Any]] = []
|
||||
for row in rows:
|
||||
created_at = row.get("created_at")
|
||||
output.append(
|
||||
{
|
||||
"id": row.get("request_code"),
|
||||
"username": row.get("username"),
|
||||
"email": row.get("contact_email") or "",
|
||||
"first_name": row.get("first_name") or "",
|
||||
"last_name": row.get("last_name") or "",
|
||||
"request_code": row.get("request_code"),
|
||||
"created_at": created_at.isoformat() if isinstance(created_at, datetime) else "",
|
||||
"note": row.get("note") or "",
|
||||
}
|
||||
)
|
||||
return JSONResponse({"requests": output})
|
||||
|
||||
|
||||
@app.get("/api/admin/access/flags")
|
||||
def list_access_flags(ctx: AuthContext = Depends(_require_auth)) -> JSONResponse:
|
||||
_require_admin(ctx)
|
||||
flags = settings.allowed_flag_groups
|
||||
if keycloak_admin.ready():
|
||||
try:
|
||||
flags = keycloak_admin.list_group_names(exclude={"admin"})
|
||||
except Exception:
|
||||
flags = settings.allowed_flag_groups
|
||||
return JSONResponse({"flags": flags})
|
||||
|
||||
|
||||
@app.get("/api/admin/audit/events")
|
||||
def list_audit_events(
|
||||
limit: int = 200,
|
||||
event_type: str | None = None,
|
||||
ctx: AuthContext = Depends(_require_auth),
|
||||
) -> JSONResponse:
|
||||
_require_admin(ctx)
|
||||
try:
|
||||
rows = storage.list_events(limit=limit, event_type=event_type)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=502, detail="failed to load audit events")
|
||||
|
||||
output: list[dict[str, Any]] = []
|
||||
for row in rows:
|
||||
created_at = row.get("created_at")
|
||||
output.append(
|
||||
{
|
||||
"id": row.get("id"),
|
||||
"event_type": row.get("event_type"),
|
||||
"detail": _parse_event_detail(row.get("detail")),
|
||||
"created_at": created_at.isoformat() if isinstance(created_at, datetime) else "",
|
||||
}
|
||||
)
|
||||
return JSONResponse({"events": output})
|
||||
|
||||
|
||||
@app.get("/api/admin/audit/task-runs")
|
||||
def list_audit_task_runs(
|
||||
limit: int = 200,
|
||||
request_code: str | None = None,
|
||||
task: str | None = None,
|
||||
ctx: AuthContext = Depends(_require_auth),
|
||||
) -> JSONResponse:
|
||||
_require_admin(ctx)
|
||||
try:
|
||||
rows = storage.list_task_runs(limit=limit, request_code=request_code, task=task)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=502, detail="failed to load task runs")
|
||||
|
||||
output: list[dict[str, Any]] = []
|
||||
for row in rows:
|
||||
started_at = row.get("started_at")
|
||||
finished_at = row.get("finished_at")
|
||||
output.append(
|
||||
{
|
||||
"id": row.get("id"),
|
||||
"request_code": row.get("request_code") or "",
|
||||
"task": row.get("task") or "",
|
||||
"status": row.get("status") or "",
|
||||
"detail": _parse_event_detail(row.get("detail")),
|
||||
"started_at": started_at.isoformat() if isinstance(started_at, datetime) else "",
|
||||
"finished_at": finished_at.isoformat() if isinstance(finished_at, datetime) else "",
|
||||
"duration_ms": row.get("duration_ms"),
|
||||
}
|
||||
)
|
||||
return JSONResponse({"task_runs": output})
|
||||
|
||||
|
||||
@app.get("/api/admin/cluster/state")
|
||||
def get_cluster_state(ctx: AuthContext = Depends(_require_auth)) -> JSONResponse:
|
||||
_require_admin(ctx)
|
||||
snapshot = storage.latest_cluster_state()
|
||||
if not snapshot:
|
||||
raise HTTPException(status_code=404, detail="cluster state unavailable")
|
||||
return JSONResponse(snapshot)
|
||||
|
||||
|
||||
@app.get("/api/internal/cluster/state")
|
||||
def get_cluster_state_internal() -> JSONResponse:
|
||||
snapshot = storage.latest_cluster_state()
|
||||
if not snapshot:
|
||||
raise HTTPException(status_code=404, detail="cluster state unavailable")
|
||||
return JSONResponse(snapshot)
|
||||
|
||||
|
||||
@app.post("/api/admin/access/requests/{username}/approve")
|
||||
async def approve_access_request(
|
||||
username: str,
|
||||
request: Request,
|
||||
ctx: AuthContext = Depends(_require_auth),
|
||||
) -> JSONResponse:
|
||||
_require_admin(ctx)
|
||||
with task_context("admin.access.approve"):
|
||||
payload = await _read_json_payload(request)
|
||||
allowed_flags = _allowed_flag_groups()
|
||||
flags = [flag for flag in _flags_from_payload(payload) if flag in allowed_flags]
|
||||
note = _note_from_payload(payload)
|
||||
|
||||
decided_by = ctx.username or ""
|
||||
try:
|
||||
row = portal_db.fetchone(
|
||||
"""
|
||||
UPDATE access_requests
|
||||
SET status = 'approved',
|
||||
decided_at = NOW(),
|
||||
decided_by = %s,
|
||||
approval_flags = %s,
|
||||
approval_note = %s
|
||||
WHERE username = %s
|
||||
AND status = 'pending'
|
||||
AND email_verified_at IS NOT NULL
|
||||
RETURNING request_code
|
||||
""",
|
||||
(decided_by or None, flags or None, note, username),
|
||||
)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=502, detail="failed to approve request")
|
||||
|
||||
if not row:
|
||||
logger.info(
|
||||
"access request approval ignored",
|
||||
extra={"event": "access_request_approve", "actor": decided_by, "username": username, "status": "skipped"},
|
||||
)
|
||||
_record_event(
|
||||
"access_request_approve",
|
||||
{
|
||||
"actor": decided_by,
|
||||
"username": username,
|
||||
"status": "skipped",
|
||||
},
|
||||
)
|
||||
return JSONResponse({"ok": True, "request_code": ""})
|
||||
|
||||
request_code = row.get("request_code") or ""
|
||||
if request_code:
|
||||
threading.Thread(
|
||||
target=provisioning.provision_access_request,
|
||||
args=(request_code,),
|
||||
daemon=True,
|
||||
).start()
|
||||
logger.info(
|
||||
"access request approved",
|
||||
extra={
|
||||
"event": "access_request_approve",
|
||||
"actor": decided_by,
|
||||
"username": username,
|
||||
"request_code": request_code,
|
||||
},
|
||||
)
|
||||
_record_event(
|
||||
"access_request_approve",
|
||||
{
|
||||
"actor": decided_by,
|
||||
"username": username,
|
||||
"request_code": request_code,
|
||||
"status": "ok",
|
||||
"flags": flags,
|
||||
"note": note or "",
|
||||
},
|
||||
)
|
||||
return JSONResponse({"ok": True, "request_code": request_code})
|
||||
|
||||
|
||||
@app.post("/api/admin/access/requests/{username}/deny")
|
||||
async def deny_access_request(
|
||||
username: str,
|
||||
request: Request,
|
||||
ctx: AuthContext = Depends(_require_auth),
|
||||
) -> JSONResponse:
|
||||
_require_admin(ctx)
|
||||
with task_context("admin.access.deny"):
|
||||
payload = await _read_json_payload(request)
|
||||
note = _note_from_payload(payload)
|
||||
decided_by = ctx.username or ""
|
||||
|
||||
try:
|
||||
row = portal_db.fetchone(
|
||||
"""
|
||||
UPDATE access_requests
|
||||
SET status = 'denied',
|
||||
decided_at = NOW(),
|
||||
decided_by = %s,
|
||||
denial_note = %s
|
||||
WHERE username = %s AND status = 'pending'
|
||||
RETURNING request_code
|
||||
""",
|
||||
(decided_by or None, note, username),
|
||||
)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=502, detail="failed to deny request")
|
||||
|
||||
if not row:
|
||||
logger.info(
|
||||
"access request denial ignored",
|
||||
extra={"event": "access_request_deny", "actor": decided_by, "username": username, "status": "skipped"},
|
||||
)
|
||||
_record_event(
|
||||
"access_request_deny",
|
||||
{
|
||||
"actor": decided_by,
|
||||
"username": username,
|
||||
"status": "skipped",
|
||||
},
|
||||
)
|
||||
return JSONResponse({"ok": True, "request_code": ""})
|
||||
logger.info(
|
||||
"access request denied",
|
||||
extra={
|
||||
"event": "access_request_deny",
|
||||
"actor": decided_by,
|
||||
"username": username,
|
||||
"request_code": row.get("request_code") or "",
|
||||
},
|
||||
)
|
||||
_record_event(
|
||||
"access_request_deny",
|
||||
{
|
||||
"actor": decided_by,
|
||||
"username": username,
|
||||
"request_code": row.get("request_code") or "",
|
||||
"status": "ok",
|
||||
"note": note or "",
|
||||
},
|
||||
)
|
||||
return JSONResponse({"ok": True, "request_code": row.get("request_code")})
|
||||
|
||||
|
||||
@app.post("/api/access/requests/{request_code}/retry")
|
||||
def retry_access_request(request_code: str) -> JSONResponse:
|
||||
code = (request_code or "").strip()
|
||||
if not code:
|
||||
raise HTTPException(status_code=400, detail="request_code is required")
|
||||
if not keycloak_admin.ready():
|
||||
raise HTTPException(status_code=503, detail="server not configured")
|
||||
|
||||
try:
|
||||
row = portal_db.fetchone(
|
||||
"SELECT status FROM access_requests WHERE request_code = %s",
|
||||
(code,),
|
||||
)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=502, detail="failed to load request")
|
||||
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="not found")
|
||||
|
||||
status = (row.get("status") or "").strip()
|
||||
if status not in {"accounts_building", "approved"}:
|
||||
raise HTTPException(status_code=409, detail="request not retryable")
|
||||
|
||||
try:
|
||||
portal_db.execute(
|
||||
"UPDATE access_requests SET provision_attempted_at = NULL WHERE request_code = %s",
|
||||
(code,),
|
||||
)
|
||||
portal_db.execute(
|
||||
"""
|
||||
UPDATE access_request_tasks
|
||||
SET status = 'pending',
|
||||
detail = 'retry requested',
|
||||
updated_at = NOW()
|
||||
WHERE request_code = %s AND status = 'error'
|
||||
""",
|
||||
(code,),
|
||||
)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=502, detail="failed to update retry state")
|
||||
|
||||
threading.Thread(
|
||||
target=provisioning.provision_access_request,
|
||||
args=(code,),
|
||||
daemon=True,
|
||||
).start()
|
||||
_record_event(
|
||||
"access_request_retry",
|
||||
{
|
||||
"request_code": code,
|
||||
"status": "ok",
|
||||
},
|
||||
)
|
||||
return JSONResponse({"ok": True, "request_code": code})
|
||||
|
||||
|
||||
@app.post("/api/account/mailu/rotate")
|
||||
def rotate_mailu_password(ctx: AuthContext = Depends(_require_auth)) -> JSONResponse:
|
||||
_require_account_access(ctx)
|
||||
if not keycloak_admin.ready():
|
||||
raise HTTPException(status_code=503, detail="server not configured")
|
||||
|
||||
username = ctx.username or ""
|
||||
if not username:
|
||||
raise HTTPException(status_code=400, detail="missing username")
|
||||
with task_context("account.mailu_rotate"):
|
||||
started = datetime.now(timezone.utc)
|
||||
status = "ok"
|
||||
error_detail = ""
|
||||
sync_enabled = mailu.ready()
|
||||
sync_ok = False
|
||||
sync_error = ""
|
||||
nextcloud_sync: dict[str, Any] = {"status": "skipped"}
|
||||
|
||||
logger.info(
|
||||
"mailu password rotate requested",
|
||||
extra={"event": "mailu_rotate", "username": username},
|
||||
)
|
||||
try:
|
||||
password = random_password()
|
||||
keycloak_admin.set_user_attribute(username, "mailu_app_password", password)
|
||||
|
||||
if sync_enabled:
|
||||
try:
|
||||
mailu.sync("ariadne_mailu_rotate")
|
||||
sync_ok = True
|
||||
except Exception as exc:
|
||||
sync_error = safe_error_detail(exc, "sync request failed")
|
||||
|
||||
try:
|
||||
nextcloud_sync = nextcloud.sync_mail(username, wait=True)
|
||||
except Exception as exc:
|
||||
nextcloud_sync = {"status": "error", "detail": safe_error_detail(exc, "failed to sync nextcloud")}
|
||||
|
||||
logger.info(
|
||||
"mailu password rotate completed",
|
||||
extra={
|
||||
"event": "mailu_rotate",
|
||||
"username": username,
|
||||
"sync_enabled": sync_enabled,
|
||||
"sync_ok": sync_ok,
|
||||
"nextcloud_status": nextcloud_sync.get("status") if isinstance(nextcloud_sync, dict) else "",
|
||||
},
|
||||
)
|
||||
return JSONResponse(
|
||||
{
|
||||
"password": password,
|
||||
"sync_enabled": sync_enabled,
|
||||
"sync_ok": sync_ok,
|
||||
"sync_error": sync_error,
|
||||
"nextcloud_sync": nextcloud_sync,
|
||||
}
|
||||
)
|
||||
except HTTPException as exc:
|
||||
status = "error"
|
||||
error_detail = str(exc.detail)
|
||||
raise
|
||||
except Exception as exc:
|
||||
status = "error"
|
||||
error_detail = safe_error_detail(exc, "mailu rotate failed")
|
||||
raise HTTPException(status_code=502, detail=error_detail)
|
||||
finally:
|
||||
finished = datetime.now(timezone.utc)
|
||||
duration_sec = (finished - started).total_seconds()
|
||||
record_task_run("mailu_rotate", status, duration_sec)
|
||||
try:
|
||||
storage.record_task_run(
|
||||
TaskRunRecord(
|
||||
request_code=None,
|
||||
task="mailu_rotate",
|
||||
status=status,
|
||||
detail=error_detail or None,
|
||||
started_at=started,
|
||||
finished_at=finished,
|
||||
duration_ms=int(duration_sec * 1000),
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
_record_event(
|
||||
"mailu_rotate",
|
||||
{
|
||||
"username": username,
|
||||
"status": status,
|
||||
"sync_enabled": sync_enabled,
|
||||
"sync_ok": sync_ok,
|
||||
"nextcloud_status": nextcloud_sync.get("status") if isinstance(nextcloud_sync, dict) else "",
|
||||
"error": error_detail,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@app.post("/api/account/wger/reset")
|
||||
def reset_wger_password(ctx: AuthContext = Depends(_require_auth)) -> JSONResponse:
|
||||
_require_account_access(ctx)
|
||||
if not keycloak_admin.ready():
|
||||
raise HTTPException(status_code=503, detail="server not configured")
|
||||
|
||||
username = ctx.username or ""
|
||||
if not username:
|
||||
raise HTTPException(status_code=400, detail="missing username")
|
||||
|
||||
with task_context("account.wger_reset"):
|
||||
mailu_email = _resolve_mailu_email(username)
|
||||
password = random_password()
|
||||
request = PasswordResetRequest(
|
||||
task_name="wger_reset",
|
||||
service_label="wger",
|
||||
username=username,
|
||||
mailu_email=mailu_email,
|
||||
password=password,
|
||||
sync_fn=lambda: wger.sync_user(username, mailu_email, password, wait=True),
|
||||
password_attr="wger_password",
|
||||
updated_attr="wger_password_updated_at",
|
||||
error_hint="wger sync failed",
|
||||
)
|
||||
return _run_password_reset(request)
|
||||
|
||||
|
||||
@app.post("/api/account/firefly/reset")
|
||||
def reset_firefly_password(ctx: AuthContext = Depends(_require_auth)) -> JSONResponse:
|
||||
_require_account_access(ctx)
|
||||
if not keycloak_admin.ready():
|
||||
raise HTTPException(status_code=503, detail="server not configured")
|
||||
|
||||
username = ctx.username or ""
|
||||
if not username:
|
||||
raise HTTPException(status_code=400, detail="missing username")
|
||||
|
||||
with task_context("account.firefly_reset"):
|
||||
mailu_email = _resolve_mailu_email(username)
|
||||
password = random_password(24)
|
||||
request = PasswordResetRequest(
|
||||
task_name="firefly_reset",
|
||||
service_label="firefly",
|
||||
username=username,
|
||||
mailu_email=mailu_email,
|
||||
password=password,
|
||||
sync_fn=lambda: firefly.sync_user(mailu_email, password, wait=True),
|
||||
password_attr="firefly_password",
|
||||
updated_attr="firefly_password_updated_at",
|
||||
error_hint="firefly sync failed",
|
||||
)
|
||||
return _run_password_reset(request)
|
||||
|
||||
|
||||
@app.post("/api/account/firefly/rotation/check")
|
||||
def firefly_rotation_check(ctx: AuthContext = Depends(_require_auth)) -> JSONResponse:
|
||||
_require_account_access(ctx)
|
||||
if not keycloak_admin.ready():
|
||||
raise HTTPException(status_code=503, detail="server not configured")
|
||||
|
||||
username = ctx.username or ""
|
||||
if not username:
|
||||
raise HTTPException(status_code=400, detail="missing username")
|
||||
|
||||
with task_context("account.firefly_rotation_check"):
|
||||
result = firefly.check_rotation_for_user(username)
|
||||
if result.get("status") == "error":
|
||||
raise HTTPException(status_code=502, detail=result.get("detail") or "firefly rotation check failed")
|
||||
return JSONResponse(result)
|
||||
|
||||
|
||||
@app.post("/api/account/wger/rotation/check")
|
||||
def wger_rotation_check(ctx: AuthContext = Depends(_require_auth)) -> JSONResponse:
|
||||
_require_account_access(ctx)
|
||||
if not keycloak_admin.ready():
|
||||
raise HTTPException(status_code=503, detail="server not configured")
|
||||
|
||||
username = ctx.username or ""
|
||||
if not username:
|
||||
raise HTTPException(status_code=400, detail="missing username")
|
||||
|
||||
with task_context("account.wger_rotation_check"):
|
||||
result = wger.check_rotation_for_user(username)
|
||||
if result.get("status") == "error":
|
||||
raise HTTPException(status_code=502, detail=result.get("detail") or "wger rotation check failed")
|
||||
return JSONResponse(result)
|
||||
|
||||
|
||||
@app.post("/api/account/nextcloud/mail/sync")
|
||||
async def nextcloud_mail_sync(request: Request, ctx: AuthContext = Depends(_require_auth)) -> JSONResponse:
|
||||
_require_account_access(ctx)
|
||||
if not keycloak_admin.ready():
|
||||
raise HTTPException(status_code=503, detail="server not configured")
|
||||
|
||||
username = ctx.username or ""
|
||||
if not username:
|
||||
raise HTTPException(status_code=400, detail="missing username")
|
||||
|
||||
with task_context("account.nextcloud_sync"):
|
||||
try:
|
||||
payload = await request.json()
|
||||
except Exception:
|
||||
payload = {}
|
||||
wait = bool(payload.get("wait", True)) if isinstance(payload, dict) else True
|
||||
|
||||
started = datetime.now(timezone.utc)
|
||||
status = "ok"
|
||||
error_detail = ""
|
||||
logger.info(
|
||||
"nextcloud mail sync requested",
|
||||
extra={"event": "nextcloud_sync", "username": username, "wait": wait},
|
||||
)
|
||||
try:
|
||||
result = nextcloud.sync_mail(username, wait=wait)
|
||||
logger.info(
|
||||
"nextcloud mail sync completed",
|
||||
extra={
|
||||
"event": "nextcloud_sync",
|
||||
"username": username,
|
||||
"status": result.get("status") if isinstance(result, dict) else "",
|
||||
},
|
||||
)
|
||||
return JSONResponse(result)
|
||||
except HTTPException as exc:
|
||||
status = "error"
|
||||
error_detail = str(exc.detail)
|
||||
raise
|
||||
except Exception as exc:
|
||||
status = "error"
|
||||
error_detail = safe_error_detail(exc, "failed to sync nextcloud mail")
|
||||
logger.info(
|
||||
"nextcloud mail sync failed",
|
||||
extra={"event": "nextcloud_sync", "username": username, "error": error_detail},
|
||||
)
|
||||
raise HTTPException(status_code=502, detail=error_detail)
|
||||
finally:
|
||||
finished = datetime.now(timezone.utc)
|
||||
duration_sec = (finished - started).total_seconds()
|
||||
record_task_run("nextcloud_sync", status, duration_sec)
|
||||
try:
|
||||
storage.record_task_run(
|
||||
TaskRunRecord(
|
||||
request_code=None,
|
||||
task="nextcloud_sync",
|
||||
status=status,
|
||||
detail=error_detail or None,
|
||||
started_at=started,
|
||||
finished_at=finished,
|
||||
duration_ms=int(duration_sec * 1000),
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
_record_event(
|
||||
"nextcloud_sync",
|
||||
{
|
||||
"username": username,
|
||||
"status": status,
|
||||
"wait": wait,
|
||||
"error": error_detail,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@app.post("/events")
|
||||
def mailu_event_listener(payload: dict[str, Any] | None = Body(default=None)) -> Response:
|
||||
"""Accept Mailu webhook events and dispatch mapped account actions."""
|
||||
|
||||
status_code, response = mailu_events.handle_event(payload)
|
||||
return JSONResponse(response, status_code=status_code)
|
||||
|
||||
|
||||
_register_admin_routes(app, _require_auth, _app_module)
|
||||
_register_account_routes(app, _require_auth, _app_module)
|
||||
|
||||
356
ariadne/app_account_routes.py
Normal file
356
ariadne/app_account_routes.py
Normal file
@ -0,0 +1,356 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Callable
|
||||
|
||||
from fastapi import Depends, FastAPI, HTTPException, Request
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from .auth.keycloak import AuthContext
|
||||
from .db.storage import TaskRunRecord
|
||||
from .utils.errors import safe_error_detail
|
||||
from .utils.logging import task_context
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AccountTaskContext:
|
||||
task_name: str
|
||||
username: str
|
||||
started: datetime
|
||||
extra: dict[str, Any] | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PasswordResetRequest:
|
||||
task_name: str
|
||||
service_label: str
|
||||
username: str
|
||||
mailu_email: str
|
||||
password: str
|
||||
sync_fn: Callable[[], dict[str, Any]]
|
||||
password_attr: str
|
||||
updated_attr: str
|
||||
error_hint: str
|
||||
|
||||
|
||||
def _resolve_mailu_email(module: Any, username: str) -> str:
|
||||
mailu_email = f"{username}@{module.settings.mailu_domain}"
|
||||
try:
|
||||
user = module.keycloak_admin.find_user(username) or {}
|
||||
attrs = user.get("attributes") if isinstance(user, dict) else None
|
||||
if isinstance(attrs, dict):
|
||||
raw_mailu = attrs.get("mailu_email")
|
||||
if isinstance(raw_mailu, list) and raw_mailu:
|
||||
return str(raw_mailu[0])
|
||||
if isinstance(raw_mailu, str) and raw_mailu:
|
||||
return raw_mailu
|
||||
except Exception:
|
||||
return mailu_email
|
||||
return mailu_email
|
||||
|
||||
|
||||
def _record_account_task(module: Any, ctx: AccountTaskContext, status: str, error_detail: str) -> None:
|
||||
finished = datetime.now(timezone.utc)
|
||||
duration_sec = (finished - ctx.started).total_seconds()
|
||||
module.record_task_run(ctx.task_name, status, duration_sec)
|
||||
try:
|
||||
module.storage.record_task_run(
|
||||
TaskRunRecord(
|
||||
request_code=None,
|
||||
task=ctx.task_name,
|
||||
status=status,
|
||||
detail=error_detail or None,
|
||||
started_at=ctx.started,
|
||||
finished_at=finished,
|
||||
duration_ms=int(duration_sec * 1000),
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
detail = {"username": ctx.username, "status": status, "error": error_detail}
|
||||
if ctx.extra:
|
||||
detail.update(ctx.extra)
|
||||
module._record_event(ctx.task_name, detail)
|
||||
|
||||
|
||||
def _run_password_reset(module: Any, request: PasswordResetRequest) -> JSONResponse:
|
||||
started = datetime.now(timezone.utc)
|
||||
task_ctx = AccountTaskContext(
|
||||
task_name=request.task_name,
|
||||
username=request.username,
|
||||
started=started,
|
||||
extra={"mailu_email": request.mailu_email},
|
||||
)
|
||||
status = "ok"
|
||||
error_detail = ""
|
||||
module.logger.info(
|
||||
f"{request.service_label} password reset requested",
|
||||
extra={"event": request.task_name, "username": request.username},
|
||||
)
|
||||
try:
|
||||
result = request.sync_fn()
|
||||
status_val = result.get("status") if isinstance(result, dict) else "error"
|
||||
if status_val != "ok":
|
||||
raise RuntimeError(f"{request.service_label} sync {status_val}")
|
||||
|
||||
module.keycloak_admin.set_user_attribute(request.username, request.password_attr, request.password)
|
||||
module.keycloak_admin.set_user_attribute(
|
||||
request.username,
|
||||
request.updated_attr,
|
||||
datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
)
|
||||
|
||||
module.logger.info(
|
||||
f"{request.service_label} password reset completed",
|
||||
extra={"event": request.task_name, "username": request.username},
|
||||
)
|
||||
return JSONResponse({"status": "ok", "password": request.password})
|
||||
except HTTPException as exc:
|
||||
status = "error"
|
||||
error_detail = str(exc.detail)
|
||||
raise
|
||||
except Exception as exc:
|
||||
status = "error"
|
||||
error_detail = safe_error_detail(exc, request.error_hint)
|
||||
raise HTTPException(status_code=502, detail=error_detail)
|
||||
finally:
|
||||
_record_account_task(module, task_ctx, status, error_detail)
|
||||
|
||||
|
||||
def _register_account_routes(app: FastAPI, require_auth: Callable, deps: Callable[[], Any]) -> None: # noqa: PLR0915
|
||||
@app.post("/api/account/mailu/rotate")
|
||||
def rotate_mailu_password(ctx: AuthContext = Depends(require_auth)) -> JSONResponse:
|
||||
"""Rotate the caller's Mailu app password and trigger dependent syncs."""
|
||||
|
||||
module = deps()
|
||||
module._require_account_access(ctx)
|
||||
if not module.keycloak_admin.ready():
|
||||
raise HTTPException(status_code=503, detail="server not configured")
|
||||
|
||||
username = ctx.username or ""
|
||||
if not username:
|
||||
raise HTTPException(status_code=400, detail="missing username")
|
||||
with task_context("account.mailu_rotate"):
|
||||
started = datetime.now(timezone.utc)
|
||||
status = "ok"
|
||||
error_detail = ""
|
||||
sync_enabled = module.mailu.ready()
|
||||
sync_ok = False
|
||||
sync_error = ""
|
||||
nextcloud_sync: dict[str, Any] = {"status": "skipped"}
|
||||
|
||||
module.logger.info("mailu password rotate requested", extra={"event": "mailu_rotate", "username": username})
|
||||
try:
|
||||
password = module.random_password()
|
||||
module.keycloak_admin.set_user_attribute(username, "mailu_app_password", password)
|
||||
|
||||
if sync_enabled:
|
||||
try:
|
||||
module.mailu.sync("ariadne_mailu_rotate")
|
||||
sync_ok = True
|
||||
except Exception as exc:
|
||||
sync_error = safe_error_detail(exc, "sync request failed")
|
||||
|
||||
try:
|
||||
nextcloud_sync = module.nextcloud.sync_mail(username, wait=True)
|
||||
except Exception as exc:
|
||||
nextcloud_sync = {"status": "error", "detail": safe_error_detail(exc, "failed to sync nextcloud")}
|
||||
|
||||
module.logger.info(
|
||||
"mailu password rotate completed",
|
||||
extra={
|
||||
"event": "mailu_rotate",
|
||||
"username": username,
|
||||
"sync_enabled": sync_enabled,
|
||||
"sync_ok": sync_ok,
|
||||
"nextcloud_status": nextcloud_sync.get("status") if isinstance(nextcloud_sync, dict) else "",
|
||||
},
|
||||
)
|
||||
return JSONResponse(
|
||||
{
|
||||
"password": password,
|
||||
"sync_enabled": sync_enabled,
|
||||
"sync_ok": sync_ok,
|
||||
"sync_error": sync_error,
|
||||
"nextcloud_sync": nextcloud_sync,
|
||||
}
|
||||
)
|
||||
except HTTPException as exc:
|
||||
status = "error"
|
||||
error_detail = str(exc.detail)
|
||||
raise
|
||||
except Exception as exc:
|
||||
status = "error"
|
||||
error_detail = safe_error_detail(exc, "mailu rotate failed")
|
||||
raise HTTPException(status_code=502, detail=error_detail)
|
||||
finally:
|
||||
task_ctx = AccountTaskContext("mailu_rotate", username, started)
|
||||
_record_account_task(module, task_ctx, status, error_detail)
|
||||
module._record_event(
|
||||
"mailu_rotate",
|
||||
{
|
||||
"username": username,
|
||||
"status": status,
|
||||
"sync_enabled": sync_enabled,
|
||||
"sync_ok": sync_ok,
|
||||
"nextcloud_status": nextcloud_sync.get("status") if isinstance(nextcloud_sync, dict) else "",
|
||||
"error": error_detail,
|
||||
},
|
||||
)
|
||||
|
||||
@app.post("/api/account/wger/reset")
|
||||
def reset_wger_password(ctx: AuthContext = Depends(require_auth)) -> JSONResponse:
|
||||
"""Reset the caller's Wger password and synchronize the service account."""
|
||||
|
||||
module = deps()
|
||||
module._require_account_access(ctx)
|
||||
if not module.keycloak_admin.ready():
|
||||
raise HTTPException(status_code=503, detail="server not configured")
|
||||
|
||||
username = ctx.username or ""
|
||||
if not username:
|
||||
raise HTTPException(status_code=400, detail="missing username")
|
||||
|
||||
with task_context("account.wger_reset"):
|
||||
mailu_email = _resolve_mailu_email(module, username)
|
||||
password = module.random_password()
|
||||
request = PasswordResetRequest(
|
||||
task_name="wger_reset",
|
||||
service_label="wger",
|
||||
username=username,
|
||||
mailu_email=mailu_email,
|
||||
password=password,
|
||||
sync_fn=lambda: module.wger.sync_user(username, mailu_email, password, wait=True),
|
||||
password_attr="wger_password",
|
||||
updated_attr="wger_password_updated_at",
|
||||
error_hint="wger sync failed",
|
||||
)
|
||||
return _run_password_reset(module, request)
|
||||
|
||||
@app.post("/api/account/firefly/reset")
|
||||
def reset_firefly_password(ctx: AuthContext = Depends(require_auth)) -> JSONResponse:
|
||||
"""Reset the caller's Firefly password and synchronize the service account."""
|
||||
|
||||
module = deps()
|
||||
module._require_account_access(ctx)
|
||||
if not module.keycloak_admin.ready():
|
||||
raise HTTPException(status_code=503, detail="server not configured")
|
||||
|
||||
username = ctx.username or ""
|
||||
if not username:
|
||||
raise HTTPException(status_code=400, detail="missing username")
|
||||
|
||||
with task_context("account.firefly_reset"):
|
||||
mailu_email = _resolve_mailu_email(module, username)
|
||||
password = module.random_password(24)
|
||||
request = PasswordResetRequest(
|
||||
task_name="firefly_reset",
|
||||
service_label="firefly",
|
||||
username=username,
|
||||
mailu_email=mailu_email,
|
||||
password=password,
|
||||
sync_fn=lambda: module.firefly.sync_user(mailu_email, password, wait=True),
|
||||
password_attr="firefly_password",
|
||||
updated_attr="firefly_password_updated_at",
|
||||
error_hint="firefly sync failed",
|
||||
)
|
||||
return _run_password_reset(module, request)
|
||||
|
||||
@app.post("/api/account/firefly/rotation/check")
|
||||
def firefly_rotation_check(ctx: AuthContext = Depends(require_auth)) -> JSONResponse:
|
||||
"""Check whether the caller's Firefly password rotation is healthy."""
|
||||
|
||||
module = deps()
|
||||
module._require_account_access(ctx)
|
||||
if not module.keycloak_admin.ready():
|
||||
raise HTTPException(status_code=503, detail="server not configured")
|
||||
|
||||
username = ctx.username or ""
|
||||
if not username:
|
||||
raise HTTPException(status_code=400, detail="missing username")
|
||||
|
||||
with task_context("account.firefly_rotation_check"):
|
||||
result = module.firefly.check_rotation_for_user(username)
|
||||
if result.get("status") == "error":
|
||||
raise HTTPException(status_code=502, detail=result.get("detail") or "firefly rotation check failed")
|
||||
return JSONResponse(result)
|
||||
|
||||
@app.post("/api/account/wger/rotation/check")
|
||||
def wger_rotation_check(ctx: AuthContext = Depends(require_auth)) -> JSONResponse:
|
||||
"""Check whether the caller's Wger password rotation is healthy."""
|
||||
|
||||
module = deps()
|
||||
module._require_account_access(ctx)
|
||||
if not module.keycloak_admin.ready():
|
||||
raise HTTPException(status_code=503, detail="server not configured")
|
||||
|
||||
username = ctx.username or ""
|
||||
if not username:
|
||||
raise HTTPException(status_code=400, detail="missing username")
|
||||
|
||||
with task_context("account.wger_rotation_check"):
|
||||
result = module.wger.check_rotation_for_user(username)
|
||||
if result.get("status") == "error":
|
||||
raise HTTPException(status_code=502, detail=result.get("detail") or "wger rotation check failed")
|
||||
return JSONResponse(result)
|
||||
|
||||
@app.post("/api/account/nextcloud/mail/sync")
|
||||
async def nextcloud_mail_sync(request: Request, ctx: AuthContext = Depends(require_auth)) -> JSONResponse:
|
||||
"""Synchronize the caller's Mailu address into Nextcloud mail settings."""
|
||||
|
||||
module = deps()
|
||||
module._require_account_access(ctx)
|
||||
if not module.keycloak_admin.ready():
|
||||
raise HTTPException(status_code=503, detail="server not configured")
|
||||
|
||||
username = ctx.username or ""
|
||||
if not username:
|
||||
raise HTTPException(status_code=400, detail="missing username")
|
||||
|
||||
with task_context("account.nextcloud_sync"):
|
||||
try:
|
||||
payload = await request.json()
|
||||
except Exception:
|
||||
payload = {}
|
||||
wait = bool(payload.get("wait", True)) if isinstance(payload, dict) else True
|
||||
|
||||
started = datetime.now(timezone.utc)
|
||||
status = "ok"
|
||||
error_detail = ""
|
||||
module.logger.info("nextcloud mail sync requested", extra={"event": "nextcloud_sync", "username": username, "wait": wait})
|
||||
try:
|
||||
result = module.nextcloud.sync_mail(username, wait=wait)
|
||||
module.logger.info(
|
||||
"nextcloud mail sync completed",
|
||||
extra={
|
||||
"event": "nextcloud_sync",
|
||||
"username": username,
|
||||
"status": result.get("status") if isinstance(result, dict) else "",
|
||||
},
|
||||
)
|
||||
return JSONResponse(result)
|
||||
except HTTPException as exc:
|
||||
status = "error"
|
||||
error_detail = str(exc.detail)
|
||||
raise
|
||||
except Exception as exc:
|
||||
status = "error"
|
||||
error_detail = safe_error_detail(exc, "failed to sync nextcloud mail")
|
||||
module.logger.info(
|
||||
"nextcloud mail sync failed",
|
||||
extra={"event": "nextcloud_sync", "username": username, "error": error_detail},
|
||||
)
|
||||
raise HTTPException(status_code=502, detail=error_detail)
|
||||
finally:
|
||||
task_ctx = AccountTaskContext("nextcloud_sync", username, started)
|
||||
_record_account_task(module, task_ctx, status, error_detail)
|
||||
module._record_event(
|
||||
"nextcloud_sync",
|
||||
{
|
||||
"username": username,
|
||||
"status": status,
|
||||
"wait": wait,
|
||||
"error": error_detail,
|
||||
},
|
||||
)
|
||||
346
ariadne/app_admin_routes.py
Normal file
346
ariadne/app_admin_routes.py
Normal file
@ -0,0 +1,346 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
import threading
|
||||
from typing import Any, Callable
|
||||
|
||||
from fastapi import Depends, FastAPI, HTTPException, Request
|
||||
from fastapi.responses import JSONResponse
|
||||
|
||||
from .auth.keycloak import AuthContext
|
||||
from .utils.logging import task_context
|
||||
|
||||
|
||||
def _register_admin_routes(app: FastAPI, require_auth: Callable, deps: Callable[[], Any]) -> None: # noqa: PLR0915
|
||||
@app.get("/api/admin/access/requests")
|
||||
def list_access_requests(ctx: AuthContext = Depends(require_auth)) -> JSONResponse:
|
||||
"""Return pending access requests for authenticated administrators."""
|
||||
|
||||
module = deps()
|
||||
module._require_admin(ctx)
|
||||
module.logger.info(
|
||||
"list access requests",
|
||||
extra={"event": "access_requests_list", "actor": ctx.username or ""},
|
||||
)
|
||||
try:
|
||||
rows = module.storage.list_pending_requests()
|
||||
except Exception:
|
||||
raise HTTPException(status_code=502, detail="failed to load requests")
|
||||
|
||||
output: list[dict[str, Any]] = []
|
||||
for row in rows:
|
||||
created_at = row.get("created_at")
|
||||
output.append(
|
||||
{
|
||||
"id": row.get("request_code"),
|
||||
"username": row.get("username"),
|
||||
"email": row.get("contact_email") or "",
|
||||
"first_name": row.get("first_name") or "",
|
||||
"last_name": row.get("last_name") or "",
|
||||
"request_code": row.get("request_code"),
|
||||
"created_at": created_at.isoformat() if isinstance(created_at, datetime) else "",
|
||||
"note": row.get("note") or "",
|
||||
}
|
||||
)
|
||||
return JSONResponse({"requests": output})
|
||||
|
||||
@app.get("/api/admin/access/flags")
|
||||
def list_access_flags(ctx: AuthContext = Depends(require_auth)) -> JSONResponse:
|
||||
"""Return Keycloak groups that can be applied as access-request flags."""
|
||||
|
||||
module = deps()
|
||||
module._require_admin(ctx)
|
||||
flags = module.settings.allowed_flag_groups
|
||||
if module.keycloak_admin.ready():
|
||||
try:
|
||||
flags = module.keycloak_admin.list_group_names(exclude={"admin"})
|
||||
except Exception:
|
||||
flags = module.settings.allowed_flag_groups
|
||||
return JSONResponse({"flags": flags})
|
||||
|
||||
@app.get("/api/admin/audit/events")
|
||||
def list_audit_events(
|
||||
limit: int = 200,
|
||||
event_type: str | None = None,
|
||||
ctx: AuthContext = Depends(require_auth),
|
||||
) -> JSONResponse:
|
||||
"""Return recent audit events with optional type filtering."""
|
||||
|
||||
module = deps()
|
||||
module._require_admin(ctx)
|
||||
try:
|
||||
rows = module.storage.list_events(limit=limit, event_type=event_type)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=502, detail="failed to load audit events")
|
||||
|
||||
output: list[dict[str, Any]] = []
|
||||
for row in rows:
|
||||
created_at = row.get("created_at")
|
||||
output.append(
|
||||
{
|
||||
"id": row.get("id"),
|
||||
"event_type": row.get("event_type"),
|
||||
"detail": module._parse_event_detail(row.get("detail")),
|
||||
"created_at": created_at.isoformat() if isinstance(created_at, datetime) else "",
|
||||
}
|
||||
)
|
||||
return JSONResponse({"events": output})
|
||||
|
||||
@app.get("/api/admin/audit/task-runs")
|
||||
def list_audit_task_runs(
|
||||
limit: int = 200,
|
||||
request_code: str | None = None,
|
||||
task: str | None = None,
|
||||
ctx: AuthContext = Depends(require_auth),
|
||||
) -> JSONResponse:
|
||||
"""Return recorded background task runs for admin audit views."""
|
||||
|
||||
module = deps()
|
||||
module._require_admin(ctx)
|
||||
try:
|
||||
rows = module.storage.list_task_runs(limit=limit, request_code=request_code, task=task)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=502, detail="failed to load task runs")
|
||||
|
||||
output: list[dict[str, Any]] = []
|
||||
for row in rows:
|
||||
started_at = row.get("started_at")
|
||||
finished_at = row.get("finished_at")
|
||||
output.append(
|
||||
{
|
||||
"id": row.get("id"),
|
||||
"request_code": row.get("request_code") or "",
|
||||
"task": row.get("task") or "",
|
||||
"status": row.get("status") or "",
|
||||
"detail": module._parse_event_detail(row.get("detail")),
|
||||
"started_at": started_at.isoformat() if isinstance(started_at, datetime) else "",
|
||||
"finished_at": finished_at.isoformat() if isinstance(finished_at, datetime) else "",
|
||||
"duration_ms": row.get("duration_ms"),
|
||||
}
|
||||
)
|
||||
return JSONResponse({"task_runs": output})
|
||||
|
||||
@app.get("/api/admin/cluster/state")
|
||||
def get_cluster_state(ctx: AuthContext = Depends(require_auth)) -> JSONResponse:
|
||||
"""Return the latest cluster-state snapshot to authenticated administrators."""
|
||||
|
||||
module = deps()
|
||||
module._require_admin(ctx)
|
||||
snapshot = module.storage.latest_cluster_state()
|
||||
if not snapshot:
|
||||
raise HTTPException(status_code=404, detail="cluster state unavailable")
|
||||
return JSONResponse(snapshot)
|
||||
|
||||
@app.get("/api/internal/cluster/state")
|
||||
def get_cluster_state_internal() -> JSONResponse:
|
||||
"""Return the latest cluster-state snapshot for trusted internal callers."""
|
||||
|
||||
module = deps()
|
||||
snapshot = module.storage.latest_cluster_state()
|
||||
if not snapshot:
|
||||
raise HTTPException(status_code=404, detail="cluster state unavailable")
|
||||
return JSONResponse(snapshot)
|
||||
|
||||
@app.post("/api/admin/access/requests/{username}/approve")
|
||||
async def approve_access_request(
|
||||
username: str,
|
||||
request: Request,
|
||||
ctx: AuthContext = Depends(require_auth),
|
||||
) -> JSONResponse:
|
||||
"""Approve a verified access request and start account provisioning."""
|
||||
|
||||
module = deps()
|
||||
module._require_admin(ctx)
|
||||
with task_context("admin.access.approve"):
|
||||
payload = await module._read_json_payload(request)
|
||||
allowed_flags = module._allowed_flag_groups()
|
||||
flags = [flag for flag in module._flags_from_payload(payload) if flag in allowed_flags]
|
||||
note = module._note_from_payload(payload)
|
||||
|
||||
decided_by = ctx.username or ""
|
||||
try:
|
||||
row = module.portal_db.fetchone(
|
||||
"""
|
||||
UPDATE access_requests
|
||||
SET status = 'approved',
|
||||
decided_at = NOW(),
|
||||
decided_by = %s,
|
||||
approval_flags = %s,
|
||||
approval_note = %s
|
||||
WHERE username = %s
|
||||
AND status = 'pending'
|
||||
AND email_verified_at IS NOT NULL
|
||||
RETURNING request_code
|
||||
""",
|
||||
(decided_by or None, flags or None, note, username),
|
||||
)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=502, detail="failed to approve request")
|
||||
|
||||
if not row:
|
||||
module.logger.info(
|
||||
"access request approval ignored",
|
||||
extra={"event": "access_request_approve", "actor": decided_by, "username": username, "status": "skipped"},
|
||||
)
|
||||
module._record_event(
|
||||
"access_request_approve",
|
||||
{
|
||||
"actor": decided_by,
|
||||
"username": username,
|
||||
"status": "skipped",
|
||||
},
|
||||
)
|
||||
return JSONResponse({"ok": True, "request_code": ""})
|
||||
|
||||
request_code = row.get("request_code") or ""
|
||||
if request_code:
|
||||
threading.Thread(
|
||||
target=module.provisioning.provision_access_request,
|
||||
args=(request_code,),
|
||||
daemon=True,
|
||||
).start()
|
||||
module.logger.info(
|
||||
"access request approved",
|
||||
extra={
|
||||
"event": "access_request_approve",
|
||||
"actor": decided_by,
|
||||
"username": username,
|
||||
"request_code": request_code,
|
||||
},
|
||||
)
|
||||
module._record_event(
|
||||
"access_request_approve",
|
||||
{
|
||||
"actor": decided_by,
|
||||
"username": username,
|
||||
"request_code": request_code,
|
||||
"status": "ok",
|
||||
"flags": flags,
|
||||
"note": note or "",
|
||||
},
|
||||
)
|
||||
return JSONResponse({"ok": True, "request_code": request_code})
|
||||
|
||||
@app.post("/api/admin/access/requests/{username}/deny")
|
||||
async def deny_access_request(
|
||||
username: str,
|
||||
request: Request,
|
||||
ctx: AuthContext = Depends(require_auth),
|
||||
) -> JSONResponse:
|
||||
"""Deny a pending access request and record the administrator decision."""
|
||||
|
||||
module = deps()
|
||||
module._require_admin(ctx)
|
||||
with task_context("admin.access.deny"):
|
||||
payload = await module._read_json_payload(request)
|
||||
note = module._note_from_payload(payload)
|
||||
decided_by = ctx.username or ""
|
||||
|
||||
try:
|
||||
row = module.portal_db.fetchone(
|
||||
"""
|
||||
UPDATE access_requests
|
||||
SET status = 'denied',
|
||||
decided_at = NOW(),
|
||||
decided_by = %s,
|
||||
denial_note = %s
|
||||
WHERE username = %s AND status = 'pending'
|
||||
RETURNING request_code
|
||||
""",
|
||||
(decided_by or None, note, username),
|
||||
)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=502, detail="failed to deny request")
|
||||
|
||||
if not row:
|
||||
module.logger.info(
|
||||
"access request denial ignored",
|
||||
extra={"event": "access_request_deny", "actor": decided_by, "username": username, "status": "skipped"},
|
||||
)
|
||||
module._record_event(
|
||||
"access_request_deny",
|
||||
{
|
||||
"actor": decided_by,
|
||||
"username": username,
|
||||
"status": "skipped",
|
||||
},
|
||||
)
|
||||
return JSONResponse({"ok": True, "request_code": ""})
|
||||
module.logger.info(
|
||||
"access request denied",
|
||||
extra={
|
||||
"event": "access_request_deny",
|
||||
"actor": decided_by,
|
||||
"username": username,
|
||||
"request_code": row.get("request_code") or "",
|
||||
},
|
||||
)
|
||||
module._record_event(
|
||||
"access_request_deny",
|
||||
{
|
||||
"actor": decided_by,
|
||||
"username": username,
|
||||
"request_code": row.get("request_code") or "",
|
||||
"status": "ok",
|
||||
"note": note or "",
|
||||
},
|
||||
)
|
||||
return JSONResponse({"ok": True, "request_code": row.get("request_code")})
|
||||
|
||||
@app.post("/api/access/requests/{request_code}/retry")
|
||||
def retry_access_request(request_code: str) -> JSONResponse:
|
||||
"""Reset failed provisioning tasks so an approved request can retry."""
|
||||
|
||||
module = deps()
|
||||
code = (request_code or "").strip()
|
||||
if not code:
|
||||
raise HTTPException(status_code=400, detail="request_code is required")
|
||||
if not module.keycloak_admin.ready():
|
||||
raise HTTPException(status_code=503, detail="server not configured")
|
||||
|
||||
try:
|
||||
row = module.portal_db.fetchone(
|
||||
"SELECT status FROM access_requests WHERE request_code = %s",
|
||||
(code,),
|
||||
)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=502, detail="failed to load request")
|
||||
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="not found")
|
||||
|
||||
status = (row.get("status") or "").strip()
|
||||
if status not in {"accounts_building", "approved"}:
|
||||
raise HTTPException(status_code=409, detail="request not retryable")
|
||||
|
||||
try:
|
||||
module.portal_db.execute(
|
||||
"UPDATE access_requests SET provision_attempted_at = NULL WHERE request_code = %s",
|
||||
(code,),
|
||||
)
|
||||
module.portal_db.execute(
|
||||
"""
|
||||
UPDATE access_request_tasks
|
||||
SET status = 'pending',
|
||||
detail = 'retry requested',
|
||||
updated_at = NOW()
|
||||
WHERE request_code = %s AND status = 'error'
|
||||
""",
|
||||
(code,),
|
||||
)
|
||||
except Exception:
|
||||
raise HTTPException(status_code=502, detail="failed to update retry state")
|
||||
|
||||
threading.Thread(
|
||||
target=module.provisioning.provision_access_request,
|
||||
args=(code,),
|
||||
daemon=True,
|
||||
).start()
|
||||
module._record_event(
|
||||
"access_request_retry",
|
||||
{
|
||||
"request_code": code,
|
||||
"status": "ok",
|
||||
},
|
||||
)
|
||||
return JSONResponse({"ok": True, "request_code": code})
|
||||
@ -19,6 +19,8 @@ class AuthContext:
|
||||
|
||||
|
||||
class KeycloakOIDC:
|
||||
"""Validate Keycloak-issued OIDC tokens and return trusted claims."""
|
||||
|
||||
def __init__(self, jwks_url: str, issuer: str, client_id: str) -> None:
|
||||
self._jwks_url = jwks_url
|
||||
self._issuer = issuer
|
||||
@ -55,12 +57,18 @@ class KeycloakOIDC:
|
||||
def _decode_claims(self, token: str, key: dict[str, Any]) -> dict[str, Any]:
|
||||
return jwt.decode(
|
||||
token,
|
||||
key=jwt.algorithms.RSAAlgorithm.from_jwk(key),
|
||||
key=self._key_from_jwk(key),
|
||||
algorithms=["RS256"],
|
||||
options={"verify_aud": False},
|
||||
issuer=self._issuer,
|
||||
)
|
||||
|
||||
def _key_from_jwk(self, key: dict[str, Any]) -> Any:
|
||||
algorithm = getattr(jwt.algorithms, "RSAAlgorithm", None)
|
||||
if algorithm and hasattr(algorithm, "from_jwk"):
|
||||
return algorithm.from_jwk(key)
|
||||
return jwt.PyJWK.from_dict(key).key
|
||||
|
||||
def _validate_audience(self, claims: dict[str, Any]) -> None:
|
||||
azp = claims.get("azp")
|
||||
aud = claims.get("aud")
|
||||
@ -97,6 +105,8 @@ class KeycloakOIDC:
|
||||
|
||||
|
||||
class Authenticator:
|
||||
"""Translate bearer tokens into Ariadne authorization context."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._oidc = KeycloakOIDC(settings.keycloak_jwks_url, settings.keycloak_issuer, settings.keycloak_client_id)
|
||||
|
||||
|
||||
@ -25,6 +25,8 @@ class DatabaseConfig:
|
||||
|
||||
|
||||
class Database:
|
||||
"""Small Postgres wrapper with migration and query helpers."""
|
||||
|
||||
def __init__(self, dsn: str, config: DatabaseConfig | None = None) -> None:
|
||||
if not dsn:
|
||||
raise RuntimeError("database URL is required")
|
||||
@ -92,13 +94,7 @@ class Database:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def migrate(
|
||||
self,
|
||||
lock_id: int,
|
||||
*,
|
||||
include_ariadne_tables: bool = True,
|
||||
include_access_requests: bool = True,
|
||||
) -> None:
|
||||
def migrate(self, lock_id: int, *, include_ariadne_tables: bool = True, include_access_requests: bool = True) -> None:
|
||||
with self.connection() as conn:
|
||||
self._configure_timeouts(conn)
|
||||
if not self._try_advisory_lock(conn, lock_id):
|
||||
|
||||
@ -62,6 +62,8 @@ class ScheduleState:
|
||||
|
||||
|
||||
class Storage:
|
||||
"""Persist Ariadne access requests, task state, and audit data."""
|
||||
|
||||
def __init__(self, db: Database, portal_db: Database | None = None) -> None:
|
||||
self._db = db
|
||||
self._portal_db = portal_db or db
|
||||
@ -262,6 +264,36 @@ class Storage:
|
||||
),
|
||||
)
|
||||
|
||||
def list_schedule_states(self) -> list[ScheduleState]:
|
||||
"""Return persisted scheduler state so metrics survive process restarts."""
|
||||
|
||||
rows = self._db.fetchall(
|
||||
"""
|
||||
SELECT task_name, cron_expr, last_started_at, last_finished_at, last_status,
|
||||
last_error, last_duration_ms, next_run_at
|
||||
FROM ariadne_schedule_state
|
||||
"""
|
||||
)
|
||||
states: list[ScheduleState] = []
|
||||
for row in rows:
|
||||
task_name = row.get("task_name")
|
||||
cron_expr = row.get("cron_expr")
|
||||
if not isinstance(task_name, str) or not isinstance(cron_expr, str):
|
||||
continue
|
||||
states.append(
|
||||
ScheduleState(
|
||||
task_name=task_name,
|
||||
cron_expr=cron_expr,
|
||||
last_started_at=row.get("last_started_at"),
|
||||
last_finished_at=row.get("last_finished_at"),
|
||||
last_status=row.get("last_status"),
|
||||
last_error=row.get("last_error"),
|
||||
last_duration_ms=row.get("last_duration_ms"),
|
||||
next_run_at=row.get("next_run_at"),
|
||||
)
|
||||
)
|
||||
return states
|
||||
|
||||
def record_cluster_state(self, snapshot: dict[str, Any]) -> None:
|
||||
payload = json.dumps(snapshot, ensure_ascii=True)
|
||||
self._db.execute(
|
||||
|
||||
@ -35,6 +35,8 @@ def _k8s_request(method: str, path: str, payload: dict[str, Any] | None = None)
|
||||
|
||||
|
||||
def get_json(path: str) -> dict[str, Any]:
|
||||
"""Fetch a Kubernetes API path and return its JSON object payload."""
|
||||
|
||||
payload = _k8s_request("GET", path)
|
||||
if not isinstance(payload, dict):
|
||||
raise RuntimeError("unexpected kubernetes response")
|
||||
@ -42,6 +44,8 @@ def get_json(path: str) -> dict[str, Any]:
|
||||
|
||||
|
||||
def post_json(path: str, payload: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Post a JSON payload to the Kubernetes API and return the response."""
|
||||
|
||||
data = _k8s_request("POST", path, payload)
|
||||
if not isinstance(data, dict):
|
||||
raise RuntimeError("unexpected kubernetes response")
|
||||
@ -49,6 +53,8 @@ def post_json(path: str, payload: dict[str, Any]) -> dict[str, Any]:
|
||||
|
||||
|
||||
def delete_json(path: str) -> dict[str, Any]:
|
||||
"""Delete a Kubernetes API resource and return the response payload."""
|
||||
|
||||
data = _k8s_request("DELETE", path)
|
||||
if not isinstance(data, dict):
|
||||
raise RuntimeError("unexpected kubernetes response")
|
||||
@ -56,6 +62,8 @@ def delete_json(path: str) -> dict[str, Any]:
|
||||
|
||||
|
||||
def get_secret_value(namespace: str, name: str, key: str) -> str:
|
||||
"""Read and decode one string value from a Kubernetes Secret."""
|
||||
|
||||
data = get_json(f"/api/v1/namespaces/{namespace}/secrets/{name}")
|
||||
blob = data.get("data") if isinstance(data.get("data"), dict) else {}
|
||||
raw = blob.get(key)
|
||||
|
||||
@ -9,10 +9,7 @@ try:
|
||||
from kubernetes import client, config
|
||||
from kubernetes.stream import stream
|
||||
except Exception as exc: # pragma: no cover - import checked at runtime
|
||||
client = None
|
||||
config = None
|
||||
stream = None
|
||||
_IMPORT_ERROR = exc
|
||||
client, config, stream, _IMPORT_ERROR = None, None, None, exc
|
||||
else:
|
||||
_IMPORT_ERROR = None
|
||||
|
||||
@ -65,18 +62,14 @@ def _build_command(command: list[str] | str, env: dict[str, str] | None) -> list
|
||||
|
||||
|
||||
class PodExecutor:
|
||||
"""Run shell commands inside the freshest ready pod matching a selector."""
|
||||
|
||||
def __init__(self, namespace: str, label_selector: str, container: str | None = None) -> None:
|
||||
self._namespace = namespace
|
||||
self._label_selector = label_selector
|
||||
self._container = container
|
||||
|
||||
def exec(
|
||||
self,
|
||||
command: list[str] | str,
|
||||
env: dict[str, str] | None = None,
|
||||
timeout_sec: float | None = None,
|
||||
check: bool = True,
|
||||
) -> ExecResult:
|
||||
def exec(self, command: list[str] | str, env: dict[str, str] | None = None, timeout_sec: float | None = None, check: bool = True) -> ExecResult:
|
||||
pod = select_pod(self._namespace, self._label_selector)
|
||||
cmd = _build_command(command, env)
|
||||
api = _ensure_client()
|
||||
|
||||
@ -47,6 +47,8 @@ def _is_ready(pod: dict[str, Any]) -> bool:
|
||||
|
||||
|
||||
def list_pods(namespace: str, label_selector: str) -> list[dict[str, Any]]:
|
||||
"""List Kubernetes pods for a namespace and label selector."""
|
||||
|
||||
namespace = (namespace or "").strip()
|
||||
if not namespace:
|
||||
raise PodSelectionError("pod namespace missing")
|
||||
@ -58,6 +60,8 @@ def list_pods(namespace: str, label_selector: str) -> list[dict[str, Any]]:
|
||||
|
||||
|
||||
def select_pod(namespace: str, label_selector: str) -> PodRef:
|
||||
"""Select the newest ready pod matching a namespace and label selector."""
|
||||
|
||||
pods = list_pods(namespace, label_selector)
|
||||
candidates: list[tuple[float, PodRef]] = []
|
||||
for pod in pods:
|
||||
|
||||
@ -1,15 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import hashlib
|
||||
import re
|
||||
from datetime import datetime
|
||||
import threading
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from ..db.database import Database
|
||||
from ..db.storage import REQUIRED_TASKS, Storage, TaskRunRecord
|
||||
from ..db.storage import REQUIRED_TASKS, Storage
|
||||
from ..metrics.metrics import record_task_run, set_access_request_counts
|
||||
from ..services.firefly import firefly
|
||||
from ..services.keycloak_admin import keycloak_admin
|
||||
@ -19,87 +15,80 @@ from ..services.nextcloud import nextcloud
|
||||
from ..services.vaultwarden import vaultwarden
|
||||
from ..services.wger import wger
|
||||
from ..settings import settings
|
||||
from ..utils.errors import safe_error_detail
|
||||
from ..utils.logging import get_logger
|
||||
from ..utils.passwords import random_password
|
||||
|
||||
|
||||
MAILU_EMAIL_ATTR = "mailu_email"
|
||||
MAILU_APP_PASSWORD_ATTR = "mailu_app_password"
|
||||
MAILU_ENABLED_ATTR = "mailu_enabled"
|
||||
WGER_PASSWORD_ATTR = "wger_password"
|
||||
WGER_PASSWORD_UPDATED_ATTR = "wger_password_updated_at"
|
||||
FIREFLY_PASSWORD_ATTR = "firefly_password"
|
||||
FIREFLY_PASSWORD_UPDATED_ATTR = "firefly_password_updated_at"
|
||||
VAULTWARDEN_GRANDFATHERED_FLAG = "vaultwarden_grandfathered"
|
||||
_RETRYABLE_HTTP_CODES = {429, 500, 502, 503, 504}
|
||||
_RETRYABLE_TOKENS = (
|
||||
"timeout",
|
||||
"temporar",
|
||||
"rate limited",
|
||||
"mailbox not ready",
|
||||
"connection refused",
|
||||
"connection reset",
|
||||
"network is unreachable",
|
||||
"dns",
|
||||
"name resolution",
|
||||
"service unavailable",
|
||||
"bad gateway",
|
||||
"gateway timeout",
|
||||
from .provisioning_accounts import _ProvisioningAccountsMixin
|
||||
from .provisioning_protocol import (
|
||||
FIREFLY_PASSWORD_ATTR,
|
||||
FIREFLY_PASSWORD_UPDATED_ATTR,
|
||||
MAILU_APP_PASSWORD_ATTR,
|
||||
MAILU_EMAIL_ATTR,
|
||||
MAILU_ENABLED_ATTR,
|
||||
VAULTWARDEN_GRANDFATHERED_FLAG,
|
||||
WGER_PASSWORD_ATTR,
|
||||
WGER_PASSWORD_UPDATED_ATTR,
|
||||
ProvisionOutcome,
|
||||
RequestContext,
|
||||
_advisory_lock_id,
|
||||
_extract_attr,
|
||||
)
|
||||
from .provisioning_tasks import _ProvisioningTaskMixin
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProvisionOutcome:
|
||||
ok: bool
|
||||
status: str
|
||||
class ProvisioningManager(_ProvisioningTaskMixin, _ProvisioningAccountsMixin):
|
||||
"""Coordinate approved access requests across identity and app services."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class RequestContext:
|
||||
request_code: str
|
||||
username: str
|
||||
first_name: str
|
||||
last_name: str
|
||||
contact_email: str
|
||||
email_verified_at: datetime | None
|
||||
status: str
|
||||
initial_password: str | None
|
||||
revealed_at: datetime | None
|
||||
attempted_at: datetime | None
|
||||
approval_flags: list[str]
|
||||
user_id: str = ""
|
||||
mailu_email: str = ""
|
||||
|
||||
|
||||
def _advisory_lock_id(request_code: str) -> int:
|
||||
digest = hashlib.sha256(request_code.encode("utf-8")).digest()
|
||||
return int.from_bytes(digest[:8], "big", signed=True)
|
||||
|
||||
|
||||
def _extract_attr(attrs: Any, key: str) -> str:
|
||||
if not isinstance(attrs, dict):
|
||||
return ""
|
||||
raw = attrs.get(key)
|
||||
if isinstance(raw, list):
|
||||
for item in raw:
|
||||
if isinstance(item, str) and item.strip():
|
||||
return item.strip()
|
||||
return ""
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
return raw.strip()
|
||||
return ""
|
||||
|
||||
|
||||
class ProvisioningManager:
|
||||
def __init__(self, db: Database, storage: Storage) -> None:
|
||||
self._db = db
|
||||
self._storage = storage
|
||||
self._thread: threading.Thread | None = None
|
||||
self._stop_event = threading.Event()
|
||||
|
||||
@property
|
||||
def _settings(self):
|
||||
return settings
|
||||
|
||||
@property
|
||||
def _logger(self):
|
||||
return logger
|
||||
|
||||
@property
|
||||
def _keycloak_admin(self):
|
||||
return keycloak_admin
|
||||
|
||||
@property
|
||||
def _mailu(self):
|
||||
return mailu
|
||||
|
||||
@property
|
||||
def _nextcloud(self):
|
||||
return nextcloud
|
||||
|
||||
@property
|
||||
def _wger(self):
|
||||
return wger
|
||||
|
||||
@property
|
||||
def _firefly(self):
|
||||
return firefly
|
||||
|
||||
@property
|
||||
def _vaultwarden(self):
|
||||
return vaultwarden
|
||||
|
||||
@property
|
||||
def _mailer(self):
|
||||
return mailer
|
||||
|
||||
def _random_password(self, length: int = 32) -> str:
|
||||
return random_password(length)
|
||||
|
||||
def _record_task_run_metric(self, task: str, status: str, duration_sec: float) -> None:
|
||||
record_task_run(task, status, duration_sec)
|
||||
|
||||
def start(self) -> None:
|
||||
if self._thread and self._thread.is_alive():
|
||||
return
|
||||
@ -207,12 +196,7 @@ class ProvisioningManager:
|
||||
extra={"event": "provision_unlock_error", "request_code": request_code},
|
||||
)
|
||||
|
||||
def _provision_locked(
|
||||
self,
|
||||
conn,
|
||||
request_code: str,
|
||||
required_tasks: list[str],
|
||||
) -> ProvisionOutcome:
|
||||
def _provision_locked(self, conn, request_code: str, required_tasks: list[str]) -> ProvisionOutcome:
|
||||
ctx = self._load_request(conn, request_code)
|
||||
if not ctx:
|
||||
return ProvisionOutcome(ok=False, status="unknown")
|
||||
@ -227,12 +211,7 @@ class ProvisioningManager:
|
||||
|
||||
return self._run_task_pipeline(conn, ctx, required_tasks)
|
||||
|
||||
def _run_task_pipeline(
|
||||
self,
|
||||
conn,
|
||||
ctx: RequestContext,
|
||||
required_tasks: list[str],
|
||||
) -> ProvisionOutcome:
|
||||
def _run_task_pipeline(self, conn, ctx: RequestContext, required_tasks: list[str]) -> ProvisionOutcome:
|
||||
if not self._ensure_keycloak_user(conn, ctx):
|
||||
return ProvisionOutcome(ok=False, status="accounts_building")
|
||||
if not self._run_account_tasks(conn, ctx):
|
||||
@ -353,581 +332,19 @@ class ProvisioningManager:
|
||||
pass
|
||||
return ProvisionOutcome(ok=False, status=pending_status)
|
||||
|
||||
def _ensure_task_rows(self, conn, request_code: str, tasks: list[str]) -> None:
|
||||
if not tasks:
|
||||
return
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO access_request_tasks (request_code, task, status, detail, updated_at)
|
||||
SELECT %s, task, 'pending', NULL, NOW()
|
||||
FROM UNNEST(%s::text[]) AS task
|
||||
ON CONFLICT (request_code, task) DO NOTHING
|
||||
""",
|
||||
(request_code, tasks),
|
||||
)
|
||||
|
||||
def _upsert_task(self, conn, request_code: str, task: str, status: str, detail: str | None = None) -> None:
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO access_request_tasks (request_code, task, status, detail, updated_at)
|
||||
VALUES (%s, %s, %s, %s, NOW())
|
||||
ON CONFLICT (request_code, task)
|
||||
DO UPDATE SET status = EXCLUDED.status, detail = EXCLUDED.detail, updated_at = NOW()
|
||||
""",
|
||||
(request_code, task, status, detail),
|
||||
)
|
||||
|
||||
def _task_statuses(self, conn, request_code: str) -> dict[str, str]:
|
||||
rows = conn.execute(
|
||||
"SELECT task, status FROM access_request_tasks WHERE request_code = %s",
|
||||
(request_code,),
|
||||
).fetchall()
|
||||
output: dict[str, str] = {}
|
||||
for row in rows:
|
||||
task = row.get("task") if isinstance(row, dict) else None
|
||||
status = row.get("status") if isinstance(row, dict) else None
|
||||
if isinstance(task, str) and isinstance(status, str):
|
||||
output[task] = status
|
||||
return output
|
||||
|
||||
def _all_tasks_ok(self, conn, request_code: str, tasks: list[str]) -> bool:
|
||||
statuses = self._task_statuses(conn, request_code)
|
||||
for task in tasks:
|
||||
if statuses.get(task) != "ok":
|
||||
return False
|
||||
return True
|
||||
|
||||
def _record_task(self, request_code: str, task: str, status: str, detail: str | None, started: datetime) -> None:
|
||||
finished = datetime.now(timezone.utc)
|
||||
duration_sec = (finished - started).total_seconds()
|
||||
record_task_run(task, status, duration_sec)
|
||||
logger.info(
|
||||
"task run",
|
||||
extra={
|
||||
"event": "task_run",
|
||||
"request_code": request_code,
|
||||
"task": task,
|
||||
"status": status,
|
||||
"duration_sec": round(duration_sec, 3),
|
||||
"detail": detail or "",
|
||||
},
|
||||
)
|
||||
try:
|
||||
self._storage.record_event(
|
||||
"provision_task",
|
||||
{
|
||||
"request_code": request_code,
|
||||
"task": task,
|
||||
"status": status,
|
||||
"duration_sec": round(duration_sec, 3),
|
||||
"detail": detail or "",
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
self._storage.record_task_run(
|
||||
TaskRunRecord(
|
||||
request_code=request_code,
|
||||
task=task,
|
||||
status=status,
|
||||
detail=detail,
|
||||
started_at=started,
|
||||
finished_at=finished,
|
||||
duration_ms=int(duration_sec * 1000),
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _task_ok(
|
||||
self,
|
||||
conn,
|
||||
request_code: str,
|
||||
task: str,
|
||||
detail: str | None,
|
||||
started: datetime,
|
||||
) -> None:
|
||||
self._upsert_task(conn, request_code, task, "ok", detail)
|
||||
self._record_task(request_code, task, "ok", detail, started)
|
||||
|
||||
def _task_error(
|
||||
self,
|
||||
conn,
|
||||
request_code: str,
|
||||
task: str,
|
||||
detail: str,
|
||||
started: datetime,
|
||||
) -> None:
|
||||
self._upsert_task(conn, request_code, task, "error", detail)
|
||||
self._record_task(request_code, task, "error", detail, started)
|
||||
|
||||
def _task_pending(
|
||||
self,
|
||||
conn,
|
||||
request_code: str,
|
||||
task: str,
|
||||
detail: str,
|
||||
started: datetime,
|
||||
) -> None:
|
||||
self._upsert_task(conn, request_code, task, "pending", detail)
|
||||
self._record_task(request_code, task, "pending", detail, started)
|
||||
|
||||
def _is_retryable_detail(self, detail: str) -> bool:
|
||||
if not detail:
|
||||
return False
|
||||
detail_lower = detail.lower()
|
||||
match = re.match(r"^http\s+(\d{3})", detail_lower)
|
||||
if match:
|
||||
try:
|
||||
code = int(match.group(1))
|
||||
except ValueError:
|
||||
code = 0
|
||||
if code in _RETRYABLE_HTTP_CODES:
|
||||
return True
|
||||
return any(token in detail_lower for token in _RETRYABLE_TOKENS)
|
||||
|
||||
def _retryable_detail(self, detail: str) -> str:
|
||||
cleaned = detail.strip() if isinstance(detail, str) else ""
|
||||
if not cleaned:
|
||||
return "retryable: temporary failure"
|
||||
return f"retryable: {cleaned}"
|
||||
|
||||
def _task_fail(
|
||||
self,
|
||||
conn,
|
||||
request_code: str,
|
||||
task: str,
|
||||
detail: str,
|
||||
started: datetime,
|
||||
) -> None:
|
||||
detail_lower = detail.lower()
|
||||
if "missing verified email address" in detail_lower or "email not verified" in detail_lower:
|
||||
self._task_pending(conn, request_code, task, "blocked: email not verified", started)
|
||||
return
|
||||
if self._is_retryable_detail(detail):
|
||||
self._task_pending(conn, request_code, task, self._retryable_detail(detail), started)
|
||||
return
|
||||
self._task_error(conn, request_code, task, detail, started)
|
||||
|
||||
def _vaultwarden_rate_limit_detail(self) -> tuple[str, datetime]:
|
||||
retry_at = datetime.now(timezone.utc) + timedelta(
|
||||
seconds=float(settings.vaultwarden_admin_rate_limit_backoff_sec)
|
||||
)
|
||||
retry_iso = retry_at.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
return f"rate limited until {retry_iso}", retry_at
|
||||
|
||||
@staticmethod
|
||||
def _parse_retry_at(detail: str) -> datetime | None:
|
||||
prefix = "rate limited until "
|
||||
if not isinstance(detail, str) or not detail.startswith(prefix):
|
||||
return None
|
||||
ts = detail[len(prefix) :].strip()
|
||||
for fmt in ("%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S%z"):
|
||||
try:
|
||||
parsed = datetime.strptime(ts, fmt)
|
||||
if parsed.tzinfo is None:
|
||||
parsed = parsed.replace(tzinfo=timezone.utc)
|
||||
return parsed
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
def _vaultwarden_retry_due(self, conn, request_code: str) -> bool:
|
||||
row = conn.execute(
|
||||
"""
|
||||
SELECT status, detail
|
||||
FROM access_request_tasks
|
||||
WHERE request_code = %s AND task = 'vaultwarden_invite'
|
||||
""",
|
||||
(request_code,),
|
||||
).fetchone()
|
||||
if not isinstance(row, dict):
|
||||
return True
|
||||
if row.get("status") != "pending":
|
||||
return True
|
||||
retry_at = self._parse_retry_at(row.get("detail") or "")
|
||||
if not retry_at:
|
||||
return True
|
||||
return datetime.now(timezone.utc) >= retry_at
|
||||
|
||||
@staticmethod
|
||||
def _set_vaultwarden_attrs(username: str, email: str, status: str) -> None:
|
||||
if not username or not email or not status:
|
||||
return
|
||||
try:
|
||||
now_iso = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
keycloak_admin.set_user_attribute(username, "vaultwarden_email", email)
|
||||
keycloak_admin.set_user_attribute(username, "vaultwarden_status", status)
|
||||
keycloak_admin.set_user_attribute(username, "vaultwarden_synced_at", now_iso)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def _ready_for_retry(self, ctx: RequestContext) -> bool:
|
||||
if ctx.status != "accounts_building":
|
||||
return True
|
||||
attempted_at = ctx.attempted_at
|
||||
if not isinstance(attempted_at, datetime):
|
||||
return True
|
||||
if attempted_at.tzinfo is None:
|
||||
attempted_at = attempted_at.replace(tzinfo=timezone.utc)
|
||||
age_sec = (datetime.now(timezone.utc) - attempted_at).total_seconds()
|
||||
return age_sec >= settings.provision_retry_cooldown_sec
|
||||
|
||||
def _require_verified_email(self, ctx: RequestContext) -> str:
|
||||
if not isinstance(ctx.email_verified_at, datetime):
|
||||
raise RuntimeError("missing verified email address")
|
||||
email = ctx.contact_email.strip()
|
||||
if not email:
|
||||
raise RuntimeError("missing verified email address")
|
||||
return email
|
||||
|
||||
def _ensure_email_unused(self, email: str, username: str) -> None:
|
||||
existing_email_user = keycloak_admin.find_user_by_email(email)
|
||||
if existing_email_user and (existing_email_user.get("username") or "") != username:
|
||||
raise RuntimeError("email is already associated with an existing Atlas account")
|
||||
|
||||
def _new_user_payload(
|
||||
self,
|
||||
username: str,
|
||||
email: str,
|
||||
mailu_email: str,
|
||||
first_name: str,
|
||||
last_name: str,
|
||||
) -> dict[str, Any]:
|
||||
payload = {
|
||||
"username": username,
|
||||
"enabled": True,
|
||||
"email": email,
|
||||
"emailVerified": True,
|
||||
"requiredActions": [],
|
||||
"attributes": {
|
||||
MAILU_EMAIL_ATTR: [mailu_email],
|
||||
MAILU_ENABLED_ATTR: ["true"],
|
||||
},
|
||||
}
|
||||
if first_name:
|
||||
payload["firstName"] = first_name
|
||||
if last_name:
|
||||
payload["lastName"] = last_name
|
||||
else:
|
||||
payload["lastName"] = username
|
||||
return payload
|
||||
|
||||
def _create_or_fetch_user(self, ctx: RequestContext) -> dict[str, Any]:
|
||||
user = keycloak_admin.find_user(ctx.username)
|
||||
if user:
|
||||
return user
|
||||
email = self._require_verified_email(ctx)
|
||||
self._ensure_email_unused(email, ctx.username)
|
||||
payload = self._new_user_payload(ctx.username, email, ctx.mailu_email, ctx.first_name, ctx.last_name)
|
||||
try:
|
||||
created_id = keycloak_admin.create_user(payload)
|
||||
return keycloak_admin.get_user(created_id)
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "create user failed")
|
||||
logger.warning(
|
||||
"keycloak create user failed, checking for existing user",
|
||||
extra={"event": "keycloak_user_fallback", "username": ctx.username, "detail": detail},
|
||||
)
|
||||
user = keycloak_admin.find_user(ctx.username)
|
||||
if user:
|
||||
return user
|
||||
user = keycloak_admin.find_user_by_email(email)
|
||||
if user:
|
||||
return user
|
||||
raise
|
||||
|
||||
def _fetch_full_user(self, user_id: str, fallback: dict[str, Any]) -> dict[str, Any]:
|
||||
try:
|
||||
return keycloak_admin.get_user(user_id)
|
||||
except Exception:
|
||||
return fallback
|
||||
|
||||
def _strip_totp_action(self, user_id: str, full_user: dict[str, Any]) -> None:
|
||||
actions = full_user.get("requiredActions")
|
||||
if not isinstance(actions, list) or "CONFIGURE_TOTP" not in actions:
|
||||
return
|
||||
new_actions = [action for action in actions if action != "CONFIGURE_TOTP"]
|
||||
keycloak_admin.update_user_safe(user_id, {"requiredActions": new_actions})
|
||||
|
||||
def _ensure_contact_email(self, ctx: RequestContext, full_user: dict[str, Any]) -> None:
|
||||
email_value = full_user.get("email")
|
||||
if isinstance(email_value, str) and email_value.strip():
|
||||
return
|
||||
if isinstance(ctx.email_verified_at, datetime) and ctx.contact_email.strip():
|
||||
keycloak_admin.update_user_safe(
|
||||
ctx.user_id,
|
||||
{"email": ctx.contact_email.strip(), "emailVerified": True},
|
||||
)
|
||||
|
||||
def _ensure_mailu_attrs(self, ctx: RequestContext, full_user: dict[str, Any]) -> None:
|
||||
attrs = full_user.get("attributes") or {}
|
||||
if not isinstance(attrs, dict):
|
||||
return
|
||||
existing = _extract_attr(attrs, MAILU_EMAIL_ATTR)
|
||||
if existing:
|
||||
ctx.mailu_email = existing
|
||||
else:
|
||||
ctx.mailu_email = f"{ctx.username}@{settings.mailu_domain}"
|
||||
keycloak_admin.set_user_attribute(ctx.username, MAILU_EMAIL_ATTR, ctx.mailu_email)
|
||||
enabled_value = _extract_attr(attrs, MAILU_ENABLED_ATTR)
|
||||
if enabled_value.lower() not in {"1", "true", "yes", "y", "on"}:
|
||||
keycloak_admin.set_user_attribute(ctx.username, MAILU_ENABLED_ATTR, "true")
|
||||
|
||||
def _sync_user_profile(self, ctx: RequestContext, user: dict[str, Any]) -> None:
|
||||
try:
|
||||
full_user = self._fetch_full_user(ctx.user_id, user)
|
||||
self._strip_totp_action(ctx.user_id, full_user)
|
||||
self._ensure_contact_email(ctx, full_user)
|
||||
self._ensure_mailu_attrs(ctx, full_user)
|
||||
except Exception:
|
||||
ctx.mailu_email = f"{ctx.username}@{settings.mailu_domain}"
|
||||
|
||||
def _ensure_keycloak_user(self, conn, ctx: RequestContext) -> bool:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
user = self._create_or_fetch_user(ctx)
|
||||
ctx.user_id = str((user or {}).get("id") or "")
|
||||
if not ctx.user_id:
|
||||
raise RuntimeError("user id missing")
|
||||
self._sync_user_profile(ctx, user)
|
||||
self._task_ok(conn, ctx.request_code, "keycloak_user", None, start)
|
||||
return True
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to ensure user")
|
||||
self._task_fail(conn, ctx.request_code, "keycloak_user", detail, start)
|
||||
return False
|
||||
|
||||
def _ensure_keycloak_password(self, conn, ctx: RequestContext) -> None:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
should_reset = ctx.status == "accounts_building" and ctx.revealed_at is None
|
||||
password_value: str | None = None
|
||||
|
||||
if should_reset:
|
||||
if isinstance(ctx.initial_password, str) and ctx.initial_password:
|
||||
password_value = ctx.initial_password
|
||||
elif ctx.initial_password is None:
|
||||
password_value = random_password(20)
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE access_requests
|
||||
SET initial_password = %s
|
||||
WHERE request_code = %s AND initial_password IS NULL
|
||||
""",
|
||||
(password_value, ctx.request_code),
|
||||
)
|
||||
ctx.initial_password = password_value
|
||||
|
||||
if password_value:
|
||||
keycloak_admin.reset_password(ctx.user_id, password_value, temporary=False)
|
||||
|
||||
if isinstance(ctx.initial_password, str) and ctx.initial_password:
|
||||
self._task_ok(conn, ctx.request_code, "keycloak_password", None, start)
|
||||
elif ctx.revealed_at is not None:
|
||||
detail = "initial password already revealed"
|
||||
self._task_ok(conn, ctx.request_code, "keycloak_password", detail, start)
|
||||
else:
|
||||
raise RuntimeError("initial password missing")
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to set password")
|
||||
self._task_fail(conn, ctx.request_code, "keycloak_password", detail, start)
|
||||
|
||||
def _ensure_keycloak_groups(self, conn, ctx: RequestContext) -> None:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
approved_flags = [flag for flag in ctx.approval_flags if flag in settings.allowed_flag_groups]
|
||||
groups = list(dict.fromkeys(settings.default_user_groups + approved_flags))
|
||||
for group_name in groups:
|
||||
gid = keycloak_admin.get_group_id(group_name)
|
||||
if not gid:
|
||||
raise RuntimeError(f"group missing: {group_name}")
|
||||
keycloak_admin.add_user_to_group(ctx.user_id, gid)
|
||||
self._task_ok(conn, ctx.request_code, "keycloak_groups", None, start)
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to add groups")
|
||||
self._task_fail(conn, ctx.request_code, "keycloak_groups", detail, start)
|
||||
|
||||
def _ensure_mailu_app_password(self, conn, ctx: RequestContext) -> None:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
full = keycloak_admin.get_user(ctx.user_id)
|
||||
attrs = full.get("attributes") or {}
|
||||
existing = _extract_attr(attrs, MAILU_APP_PASSWORD_ATTR)
|
||||
if not existing:
|
||||
keycloak_admin.set_user_attribute(ctx.username, MAILU_APP_PASSWORD_ATTR, random_password())
|
||||
self._task_ok(conn, ctx.request_code, "mailu_app_password", None, start)
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to set mail password")
|
||||
self._task_fail(conn, ctx.request_code, "mailu_app_password", detail, start)
|
||||
|
||||
def _sync_mailu(self, conn, ctx: RequestContext) -> bool:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
if not mailu.ready():
|
||||
detail = "mailu not configured"
|
||||
self._task_ok(conn, ctx.request_code, "mailu_sync", detail, start)
|
||||
return True
|
||||
mailu.sync(reason="ariadne_access_approve", force=True)
|
||||
mailbox_ready = mailu.wait_for_mailbox(
|
||||
ctx.mailu_email,
|
||||
settings.mailu_mailbox_wait_timeout_sec,
|
||||
)
|
||||
if not mailbox_ready:
|
||||
raise RuntimeError("mailbox not ready")
|
||||
self._task_ok(conn, ctx.request_code, "mailu_sync", None, start)
|
||||
return True
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to sync mailu")
|
||||
self._task_fail(conn, ctx.request_code, "mailu_sync", detail, start)
|
||||
return False
|
||||
|
||||
def _sync_nextcloud_mail(self, conn, ctx: RequestContext) -> None:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
if not settings.nextcloud_namespace:
|
||||
detail = "sync disabled"
|
||||
self._task_ok(conn, ctx.request_code, "nextcloud_mail_sync", detail, start)
|
||||
return
|
||||
result = nextcloud.sync_mail(ctx.username, wait=True)
|
||||
if isinstance(result, dict) and result.get("status") == "ok":
|
||||
self._task_ok(conn, ctx.request_code, "nextcloud_mail_sync", None, start)
|
||||
return
|
||||
status_val = result.get("status") if isinstance(result, dict) else "error"
|
||||
summary = result.get("summary") if isinstance(result, dict) else None
|
||||
detail = ""
|
||||
if summary is not None:
|
||||
detail = getattr(summary, "detail", "") or ""
|
||||
if not detail and isinstance(result, dict):
|
||||
detail = str(result.get("detail") or "")
|
||||
detail = detail or str(status_val)
|
||||
self._task_fail(conn, ctx.request_code, "nextcloud_mail_sync", detail, start)
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to sync nextcloud")
|
||||
self._task_fail(conn, ctx.request_code, "nextcloud_mail_sync", detail, start)
|
||||
|
||||
def _ensure_wger_account(self, conn, ctx: RequestContext) -> None:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
full = keycloak_admin.get_user(ctx.user_id)
|
||||
attrs = full.get("attributes") or {}
|
||||
wger_password = _extract_attr(attrs, WGER_PASSWORD_ATTR)
|
||||
wger_password_updated_at = _extract_attr(attrs, WGER_PASSWORD_UPDATED_ATTR)
|
||||
|
||||
if not wger_password:
|
||||
wger_password = random_password(20)
|
||||
keycloak_admin.set_user_attribute(ctx.username, WGER_PASSWORD_ATTR, wger_password)
|
||||
|
||||
if not wger_password_updated_at:
|
||||
result = wger.sync_user(ctx.username, ctx.mailu_email, wger_password, wait=True)
|
||||
status_val = result.get("status") if isinstance(result, dict) else "error"
|
||||
if status_val != "ok":
|
||||
detail = result.get("detail") if isinstance(result, dict) else ""
|
||||
detail = detail or f"wger sync {status_val}"
|
||||
raise RuntimeError(detail)
|
||||
now_iso = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
keycloak_admin.set_user_attribute(ctx.username, WGER_PASSWORD_UPDATED_ATTR, now_iso)
|
||||
|
||||
self._task_ok(conn, ctx.request_code, "wger_account", None, start)
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to provision wger")
|
||||
self._task_fail(conn, ctx.request_code, "wger_account", detail, start)
|
||||
|
||||
def _ensure_firefly_account(self, conn, ctx: RequestContext) -> None:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
full = keycloak_admin.get_user(ctx.user_id)
|
||||
attrs = full.get("attributes") or {}
|
||||
firefly_password = _extract_attr(attrs, FIREFLY_PASSWORD_ATTR)
|
||||
firefly_password_updated_at = _extract_attr(attrs, FIREFLY_PASSWORD_UPDATED_ATTR)
|
||||
|
||||
if not firefly_password:
|
||||
firefly_password = random_password(24)
|
||||
keycloak_admin.set_user_attribute(ctx.username, FIREFLY_PASSWORD_ATTR, firefly_password)
|
||||
|
||||
if not firefly_password_updated_at:
|
||||
result = firefly.sync_user(ctx.mailu_email, firefly_password, wait=True)
|
||||
status_val = result.get("status") if isinstance(result, dict) else "error"
|
||||
if status_val != "ok":
|
||||
raise RuntimeError(f"firefly sync {status_val}")
|
||||
now_iso = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
keycloak_admin.set_user_attribute(ctx.username, FIREFLY_PASSWORD_UPDATED_ATTR, now_iso)
|
||||
|
||||
self._task_ok(conn, ctx.request_code, "firefly_account", None, start)
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to provision firefly")
|
||||
self._task_fail(conn, ctx.request_code, "firefly_account", detail, start)
|
||||
|
||||
def _handle_vaultwarden_grandfathered(self, conn, ctx: RequestContext, start: datetime) -> None:
|
||||
lookup = vaultwarden.find_user_by_email(ctx.contact_email)
|
||||
if lookup.status == "rate_limited":
|
||||
detail, _ = self._vaultwarden_rate_limit_detail()
|
||||
self._task_pending(conn, ctx.request_code, "vaultwarden_invite", detail, start)
|
||||
self._set_vaultwarden_attrs(ctx.username, ctx.contact_email, "rate_limited")
|
||||
return
|
||||
if lookup.ok and lookup.status == "present":
|
||||
self._task_ok(conn, ctx.request_code, "vaultwarden_invite", "grandfathered", start)
|
||||
self._set_vaultwarden_attrs(ctx.username, ctx.contact_email, "grandfathered")
|
||||
return
|
||||
if lookup.ok and lookup.status == "missing":
|
||||
self._task_error(
|
||||
conn,
|
||||
ctx.request_code,
|
||||
"vaultwarden_invite",
|
||||
"vaultwarden account not found for recovery email",
|
||||
start,
|
||||
)
|
||||
return
|
||||
detail = lookup.detail or lookup.status
|
||||
self._task_fail(conn, ctx.request_code, "vaultwarden_invite", detail, start)
|
||||
|
||||
def _ensure_vaultwarden_invite(self, conn, ctx: RequestContext) -> None:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
if not self._vaultwarden_retry_due(conn, ctx.request_code):
|
||||
return
|
||||
if VAULTWARDEN_GRANDFATHERED_FLAG in ctx.approval_flags:
|
||||
self._handle_vaultwarden_grandfathered(conn, ctx, start)
|
||||
return
|
||||
if not mailu.wait_for_mailbox(ctx.mailu_email, settings.mailu_mailbox_wait_timeout_sec):
|
||||
try:
|
||||
mailu.sync(reason="ariadne_vaultwarden_retry", force=True)
|
||||
except Exception:
|
||||
pass
|
||||
if not mailu.wait_for_mailbox(ctx.mailu_email, settings.mailu_mailbox_wait_timeout_sec):
|
||||
raise RuntimeError("mailbox not ready")
|
||||
|
||||
result = vaultwarden.invite_user(ctx.mailu_email)
|
||||
if result.ok:
|
||||
self._task_ok(conn, ctx.request_code, "vaultwarden_invite", result.status, start)
|
||||
elif result.status == "rate_limited":
|
||||
detail, _ = self._vaultwarden_rate_limit_detail()
|
||||
self._task_pending(conn, ctx.request_code, "vaultwarden_invite", detail, start)
|
||||
else:
|
||||
detail = result.detail or result.status
|
||||
self._task_error(conn, ctx.request_code, "vaultwarden_invite", detail, start)
|
||||
|
||||
status = result.status if result.status != "rate_limited" else "rate_limited"
|
||||
self._set_vaultwarden_attrs(ctx.username, ctx.mailu_email, status)
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to provision vaultwarden")
|
||||
self._task_fail(conn, ctx.request_code, "vaultwarden_invite", detail, start)
|
||||
|
||||
def _send_welcome_email(self, request_code: str, username: str, contact_email: str) -> None:
|
||||
if not settings.welcome_email_enabled:
|
||||
return
|
||||
if not contact_email:
|
||||
return
|
||||
try:
|
||||
row = self._db.fetchone(
|
||||
"SELECT welcome_email_sent_at FROM access_requests WHERE request_code = %s",
|
||||
(request_code,),
|
||||
)
|
||||
if row and row.get("welcome_email_sent_at"):
|
||||
return
|
||||
onboarding_url = f"{settings.portal_public_base_url}/onboarding?code={request_code}"
|
||||
mailer.send_welcome(contact_email, request_code, onboarding_url, username=username)
|
||||
self._storage.mark_welcome_sent(request_code)
|
||||
except MailerError:
|
||||
return
|
||||
__all__ = [
|
||||
"FIREFLY_PASSWORD_ATTR",
|
||||
"FIREFLY_PASSWORD_UPDATED_ATTR",
|
||||
"MAILU_APP_PASSWORD_ATTR",
|
||||
"MAILU_EMAIL_ATTR",
|
||||
"MAILU_ENABLED_ATTR",
|
||||
"MailerError",
|
||||
"ProvisionOutcome",
|
||||
"ProvisioningManager",
|
||||
"RequestContext",
|
||||
"VAULTWARDEN_GRANDFATHERED_FLAG",
|
||||
"WGER_PASSWORD_ATTR",
|
||||
"WGER_PASSWORD_UPDATED_ATTR",
|
||||
"_extract_attr",
|
||||
]
|
||||
|
||||
401
ariadne/manager/provisioning_accounts.py
Normal file
401
ariadne/manager/provisioning_accounts.py
Normal file
@ -0,0 +1,401 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from ..services.mailer import MailerError
|
||||
from ..utils.errors import safe_error_detail
|
||||
from .provisioning_protocol import (
|
||||
FIREFLY_PASSWORD_ATTR,
|
||||
FIREFLY_PASSWORD_UPDATED_ATTR,
|
||||
MAILU_APP_PASSWORD_ATTR,
|
||||
MAILU_EMAIL_ATTR,
|
||||
MAILU_ENABLED_ATTR,
|
||||
VAULTWARDEN_GRANDFATHERED_FLAG,
|
||||
WGER_PASSWORD_ATTR,
|
||||
WGER_PASSWORD_UPDATED_ATTR,
|
||||
RequestContext,
|
||||
_extract_attr,
|
||||
)
|
||||
|
||||
|
||||
class _ProvisioningAccountsMixin:
|
||||
def _set_vaultwarden_attrs(self, username: str, email: str, status: str) -> None:
|
||||
if not username or not email or not status:
|
||||
return
|
||||
try:
|
||||
now_iso = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
self._keycloak_admin.set_user_attribute(username, "vaultwarden_email", email)
|
||||
self._keycloak_admin.set_user_attribute(username, "vaultwarden_status", status)
|
||||
self._keycloak_admin.set_user_attribute(username, "vaultwarden_synced_at", now_iso)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def _ready_for_retry(self, ctx: RequestContext) -> bool:
|
||||
if ctx.status != "accounts_building":
|
||||
return True
|
||||
attempted_at = ctx.attempted_at
|
||||
if not isinstance(attempted_at, datetime):
|
||||
return True
|
||||
if attempted_at.tzinfo is None:
|
||||
attempted_at = attempted_at.replace(tzinfo=timezone.utc)
|
||||
age_sec = (datetime.now(timezone.utc) - attempted_at).total_seconds()
|
||||
return age_sec >= self._settings.provision_retry_cooldown_sec
|
||||
|
||||
def _require_verified_email(self, ctx: RequestContext) -> str:
|
||||
if not isinstance(ctx.email_verified_at, datetime):
|
||||
raise RuntimeError("missing verified email address")
|
||||
email = ctx.contact_email.strip()
|
||||
if not email:
|
||||
raise RuntimeError("missing verified email address")
|
||||
return email
|
||||
|
||||
def _ensure_email_unused(self, email: str, username: str) -> None:
|
||||
existing_email_user = self._keycloak_admin.find_user_by_email(email)
|
||||
if existing_email_user and (existing_email_user.get("username") or "") != username:
|
||||
raise RuntimeError("email is already associated with an existing Atlas account")
|
||||
|
||||
def _new_user_payload(
|
||||
self,
|
||||
username: str,
|
||||
email: str,
|
||||
mailu_email: str,
|
||||
first_name: str,
|
||||
last_name: str,
|
||||
) -> dict[str, Any]:
|
||||
payload = {
|
||||
"username": username,
|
||||
"enabled": True,
|
||||
"email": email,
|
||||
"emailVerified": True,
|
||||
"requiredActions": [],
|
||||
"attributes": {
|
||||
MAILU_EMAIL_ATTR: [mailu_email],
|
||||
MAILU_ENABLED_ATTR: ["true"],
|
||||
},
|
||||
}
|
||||
if first_name:
|
||||
payload["firstName"] = first_name
|
||||
if last_name:
|
||||
payload["lastName"] = last_name
|
||||
else:
|
||||
payload["lastName"] = username
|
||||
return payload
|
||||
|
||||
def _create_or_fetch_user(self, ctx: RequestContext) -> dict[str, Any]:
|
||||
user = self._keycloak_admin.find_user(ctx.username)
|
||||
if user:
|
||||
return user
|
||||
email = self._require_verified_email(ctx)
|
||||
self._ensure_email_unused(email, ctx.username)
|
||||
payload = self._new_user_payload(ctx.username, email, ctx.mailu_email, ctx.first_name, ctx.last_name)
|
||||
try:
|
||||
created_id = self._keycloak_admin.create_user(payload)
|
||||
return self._keycloak_admin.get_user(created_id)
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "create user failed")
|
||||
self._logger.warning(
|
||||
"keycloak create user failed, checking for existing user",
|
||||
extra={"event": "keycloak_user_fallback", "username": ctx.username, "detail": detail},
|
||||
)
|
||||
user = self._keycloak_admin.find_user(ctx.username)
|
||||
if user:
|
||||
return user
|
||||
user = self._keycloak_admin.find_user_by_email(email)
|
||||
if user:
|
||||
return user
|
||||
raise
|
||||
|
||||
def _fetch_full_user(self, user_id: str, fallback: dict[str, Any]) -> dict[str, Any]:
|
||||
try:
|
||||
return self._keycloak_admin.get_user(user_id)
|
||||
except Exception:
|
||||
return fallback
|
||||
|
||||
def _strip_totp_action(self, user_id: str, full_user: dict[str, Any]) -> None:
|
||||
actions = full_user.get("requiredActions")
|
||||
if not isinstance(actions, list) or "CONFIGURE_TOTP" not in actions:
|
||||
return
|
||||
new_actions = [action for action in actions if action != "CONFIGURE_TOTP"]
|
||||
self._keycloak_admin.update_user_safe(user_id, {"requiredActions": new_actions})
|
||||
|
||||
def _ensure_contact_email(self, ctx: RequestContext, full_user: dict[str, Any]) -> None:
|
||||
email_value = full_user.get("email")
|
||||
if isinstance(email_value, str) and email_value.strip():
|
||||
return
|
||||
if isinstance(ctx.email_verified_at, datetime) and ctx.contact_email.strip():
|
||||
self._keycloak_admin.update_user_safe(
|
||||
ctx.user_id,
|
||||
{"email": ctx.contact_email.strip(), "emailVerified": True},
|
||||
)
|
||||
|
||||
def _ensure_mailu_attrs(self, ctx: RequestContext, full_user: dict[str, Any]) -> None:
|
||||
attrs = full_user.get("attributes") or {}
|
||||
if not isinstance(attrs, dict):
|
||||
return
|
||||
existing = _extract_attr(attrs, MAILU_EMAIL_ATTR)
|
||||
if existing:
|
||||
ctx.mailu_email = existing
|
||||
else:
|
||||
ctx.mailu_email = f"{ctx.username}@{self._settings.mailu_domain}"
|
||||
self._keycloak_admin.set_user_attribute(ctx.username, MAILU_EMAIL_ATTR, ctx.mailu_email)
|
||||
enabled_value = _extract_attr(attrs, MAILU_ENABLED_ATTR)
|
||||
if enabled_value.lower() not in {"1", "true", "yes", "y", "on"}:
|
||||
self._keycloak_admin.set_user_attribute(ctx.username, MAILU_ENABLED_ATTR, "true")
|
||||
|
||||
def _sync_user_profile(self, ctx: RequestContext, user: dict[str, Any]) -> None:
|
||||
try:
|
||||
full_user = self._fetch_full_user(ctx.user_id, user)
|
||||
self._strip_totp_action(ctx.user_id, full_user)
|
||||
self._ensure_contact_email(ctx, full_user)
|
||||
self._ensure_mailu_attrs(ctx, full_user)
|
||||
except Exception:
|
||||
ctx.mailu_email = f"{ctx.username}@{self._settings.mailu_domain}"
|
||||
|
||||
def _ensure_keycloak_user(self, conn, ctx: RequestContext) -> bool:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
user = self._create_or_fetch_user(ctx)
|
||||
ctx.user_id = str((user or {}).get("id") or "")
|
||||
if not ctx.user_id:
|
||||
raise RuntimeError("user id missing")
|
||||
self._sync_user_profile(ctx, user)
|
||||
self._task_ok(conn, ctx.request_code, "keycloak_user", None, start)
|
||||
return True
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to ensure user")
|
||||
self._task_fail(conn, ctx.request_code, "keycloak_user", detail, start)
|
||||
return False
|
||||
|
||||
def _ensure_keycloak_password(self, conn, ctx: RequestContext) -> None:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
should_reset = ctx.status == "accounts_building" and ctx.revealed_at is None
|
||||
password_value: str | None = None
|
||||
|
||||
if should_reset:
|
||||
if isinstance(ctx.initial_password, str) and ctx.initial_password:
|
||||
password_value = ctx.initial_password
|
||||
elif ctx.initial_password is None:
|
||||
password_value = self._random_password(20)
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE access_requests
|
||||
SET initial_password = %s
|
||||
WHERE request_code = %s AND initial_password IS NULL
|
||||
""",
|
||||
(password_value, ctx.request_code),
|
||||
)
|
||||
ctx.initial_password = password_value
|
||||
|
||||
if password_value:
|
||||
self._keycloak_admin.reset_password(ctx.user_id, password_value, temporary=False)
|
||||
|
||||
if isinstance(ctx.initial_password, str) and ctx.initial_password:
|
||||
self._task_ok(conn, ctx.request_code, "keycloak_password", None, start)
|
||||
elif ctx.revealed_at is not None:
|
||||
detail = "initial password already revealed"
|
||||
self._task_ok(conn, ctx.request_code, "keycloak_password", detail, start)
|
||||
else:
|
||||
raise RuntimeError("initial password missing")
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to set password")
|
||||
self._task_fail(conn, ctx.request_code, "keycloak_password", detail, start)
|
||||
|
||||
def _ensure_keycloak_groups(self, conn, ctx: RequestContext) -> None:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
approved_flags = [flag for flag in ctx.approval_flags if flag in self._settings.allowed_flag_groups]
|
||||
groups = list(dict.fromkeys(self._settings.default_user_groups + approved_flags))
|
||||
for group_name in groups:
|
||||
gid = self._keycloak_admin.get_group_id(group_name)
|
||||
if not gid:
|
||||
raise RuntimeError(f"group missing: {group_name}")
|
||||
self._keycloak_admin.add_user_to_group(ctx.user_id, gid)
|
||||
self._task_ok(conn, ctx.request_code, "keycloak_groups", None, start)
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to add groups")
|
||||
self._task_fail(conn, ctx.request_code, "keycloak_groups", detail, start)
|
||||
|
||||
def _ensure_mailu_app_password(self, conn, ctx: RequestContext) -> None:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
full = self._keycloak_admin.get_user(ctx.user_id)
|
||||
attrs = full.get("attributes") or {}
|
||||
existing = _extract_attr(attrs, MAILU_APP_PASSWORD_ATTR)
|
||||
if not existing:
|
||||
self._keycloak_admin.set_user_attribute(ctx.username, MAILU_APP_PASSWORD_ATTR, self._random_password())
|
||||
self._task_ok(conn, ctx.request_code, "mailu_app_password", None, start)
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to set mail password")
|
||||
self._task_fail(conn, ctx.request_code, "mailu_app_password", detail, start)
|
||||
|
||||
def _sync_mailu(self, conn, ctx: RequestContext) -> bool:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
if not self._mailu.ready():
|
||||
detail = "mailu not configured"
|
||||
self._task_ok(conn, ctx.request_code, "mailu_sync", detail, start)
|
||||
return True
|
||||
self._mailu.sync(reason="ariadne_access_approve", force=True)
|
||||
mailbox_ready = self._mailu.wait_for_mailbox(
|
||||
ctx.mailu_email,
|
||||
self._settings.mailu_mailbox_wait_timeout_sec,
|
||||
)
|
||||
if not mailbox_ready:
|
||||
raise RuntimeError("mailbox not ready")
|
||||
self._task_ok(conn, ctx.request_code, "mailu_sync", None, start)
|
||||
return True
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to sync mailu")
|
||||
self._task_fail(conn, ctx.request_code, "mailu_sync", detail, start)
|
||||
return False
|
||||
|
||||
def _sync_nextcloud_mail(self, conn, ctx: RequestContext) -> None:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
if not self._settings.nextcloud_namespace:
|
||||
detail = "sync disabled"
|
||||
self._task_ok(conn, ctx.request_code, "nextcloud_mail_sync", detail, start)
|
||||
return
|
||||
result = self._nextcloud.sync_mail(ctx.username, wait=True)
|
||||
if isinstance(result, dict) and result.get("status") == "ok":
|
||||
self._task_ok(conn, ctx.request_code, "nextcloud_mail_sync", None, start)
|
||||
return
|
||||
status_val = result.get("status") if isinstance(result, dict) else "error"
|
||||
summary = result.get("summary") if isinstance(result, dict) else None
|
||||
detail = ""
|
||||
if summary is not None:
|
||||
detail = getattr(summary, "detail", "") or ""
|
||||
if not detail and isinstance(result, dict):
|
||||
detail = str(result.get("detail") or "")
|
||||
detail = detail or str(status_val)
|
||||
self._task_fail(conn, ctx.request_code, "nextcloud_mail_sync", detail, start)
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to sync nextcloud")
|
||||
self._task_fail(conn, ctx.request_code, "nextcloud_mail_sync", detail, start)
|
||||
|
||||
def _ensure_wger_account(self, conn, ctx: RequestContext) -> None:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
full = self._keycloak_admin.get_user(ctx.user_id)
|
||||
attrs = full.get("attributes") or {}
|
||||
wger_password = _extract_attr(attrs, WGER_PASSWORD_ATTR)
|
||||
wger_password_updated_at = _extract_attr(attrs, WGER_PASSWORD_UPDATED_ATTR)
|
||||
|
||||
if not wger_password:
|
||||
wger_password = self._random_password(20)
|
||||
self._keycloak_admin.set_user_attribute(ctx.username, WGER_PASSWORD_ATTR, wger_password)
|
||||
|
||||
if not wger_password_updated_at:
|
||||
result = self._wger.sync_user(ctx.username, ctx.mailu_email, wger_password, wait=True)
|
||||
status_val = result.get("status") if isinstance(result, dict) else "error"
|
||||
if status_val != "ok":
|
||||
detail = result.get("detail") if isinstance(result, dict) else ""
|
||||
detail = detail or f"wger sync {status_val}"
|
||||
raise RuntimeError(detail)
|
||||
now_iso = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
self._keycloak_admin.set_user_attribute(ctx.username, WGER_PASSWORD_UPDATED_ATTR, now_iso)
|
||||
|
||||
self._task_ok(conn, ctx.request_code, "wger_account", None, start)
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to provision wger")
|
||||
self._task_fail(conn, ctx.request_code, "wger_account", detail, start)
|
||||
|
||||
def _ensure_firefly_account(self, conn, ctx: RequestContext) -> None:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
full = self._keycloak_admin.get_user(ctx.user_id)
|
||||
attrs = full.get("attributes") or {}
|
||||
firefly_password = _extract_attr(attrs, FIREFLY_PASSWORD_ATTR)
|
||||
firefly_password_updated_at = _extract_attr(attrs, FIREFLY_PASSWORD_UPDATED_ATTR)
|
||||
|
||||
if not firefly_password:
|
||||
firefly_password = self._random_password(24)
|
||||
self._keycloak_admin.set_user_attribute(ctx.username, FIREFLY_PASSWORD_ATTR, firefly_password)
|
||||
|
||||
if not firefly_password_updated_at:
|
||||
result = self._firefly.sync_user(ctx.mailu_email, firefly_password, wait=True)
|
||||
status_val = result.get("status") if isinstance(result, dict) else "error"
|
||||
if status_val != "ok":
|
||||
raise RuntimeError(f"firefly sync {status_val}")
|
||||
now_iso = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
self._keycloak_admin.set_user_attribute(ctx.username, FIREFLY_PASSWORD_UPDATED_ATTR, now_iso)
|
||||
|
||||
self._task_ok(conn, ctx.request_code, "firefly_account", None, start)
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to provision firefly")
|
||||
self._task_fail(conn, ctx.request_code, "firefly_account", detail, start)
|
||||
|
||||
def _handle_vaultwarden_grandfathered(self, conn, ctx: RequestContext, start: datetime) -> None:
|
||||
lookup = self._vaultwarden.find_user_by_email(ctx.contact_email)
|
||||
if lookup.status == "rate_limited":
|
||||
detail, _ = self._vaultwarden_rate_limit_detail()
|
||||
self._task_pending(conn, ctx.request_code, "vaultwarden_invite", detail, start)
|
||||
self._set_vaultwarden_attrs(ctx.username, ctx.contact_email, "rate_limited")
|
||||
return
|
||||
if lookup.ok and lookup.status == "present":
|
||||
self._task_ok(conn, ctx.request_code, "vaultwarden_invite", "grandfathered", start)
|
||||
self._set_vaultwarden_attrs(ctx.username, ctx.contact_email, "grandfathered")
|
||||
return
|
||||
if lookup.ok and lookup.status == "missing":
|
||||
self._task_error(
|
||||
conn,
|
||||
ctx.request_code,
|
||||
"vaultwarden_invite",
|
||||
"vaultwarden account not found for recovery email",
|
||||
start,
|
||||
)
|
||||
return
|
||||
detail = lookup.detail or lookup.status
|
||||
self._task_fail(conn, ctx.request_code, "vaultwarden_invite", detail, start)
|
||||
|
||||
def _ensure_vaultwarden_invite(self, conn, ctx: RequestContext) -> None:
|
||||
start = datetime.now(timezone.utc)
|
||||
try:
|
||||
if not self._vaultwarden_retry_due(conn, ctx.request_code):
|
||||
return
|
||||
if VAULTWARDEN_GRANDFATHERED_FLAG in ctx.approval_flags:
|
||||
self._handle_vaultwarden_grandfathered(conn, ctx, start)
|
||||
return
|
||||
if not self._mailu.wait_for_mailbox(ctx.mailu_email, self._settings.mailu_mailbox_wait_timeout_sec):
|
||||
try:
|
||||
self._mailu.sync(reason="ariadne_vaultwarden_retry", force=True)
|
||||
except Exception:
|
||||
pass
|
||||
if not self._mailu.wait_for_mailbox(ctx.mailu_email, self._settings.mailu_mailbox_wait_timeout_sec):
|
||||
raise RuntimeError("mailbox not ready")
|
||||
|
||||
result = self._vaultwarden.invite_user(ctx.mailu_email)
|
||||
if result.ok:
|
||||
self._task_ok(conn, ctx.request_code, "vaultwarden_invite", result.status, start)
|
||||
elif result.status == "rate_limited":
|
||||
detail, _ = self._vaultwarden_rate_limit_detail()
|
||||
self._task_pending(conn, ctx.request_code, "vaultwarden_invite", detail, start)
|
||||
else:
|
||||
detail = result.detail or result.status
|
||||
self._task_error(conn, ctx.request_code, "vaultwarden_invite", detail, start)
|
||||
|
||||
status = result.status if result.status != "rate_limited" else "rate_limited"
|
||||
self._set_vaultwarden_attrs(ctx.username, ctx.mailu_email, status)
|
||||
except Exception as exc:
|
||||
detail = safe_error_detail(exc, "failed to provision vaultwarden")
|
||||
self._task_fail(conn, ctx.request_code, "vaultwarden_invite", detail, start)
|
||||
|
||||
def _send_welcome_email(self, request_code: str, username: str, contact_email: str) -> None:
|
||||
if not self._settings.welcome_email_enabled:
|
||||
return
|
||||
if not contact_email:
|
||||
return
|
||||
try:
|
||||
row = self._db.fetchone(
|
||||
"SELECT welcome_email_sent_at FROM access_requests WHERE request_code = %s",
|
||||
(request_code,),
|
||||
)
|
||||
if row and row.get("welcome_email_sent_at"):
|
||||
return
|
||||
onboarding_url = f"{self._settings.portal_public_base_url}/onboarding?code={request_code}"
|
||||
self._mailer.send_welcome(contact_email, request_code, onboarding_url, username=username)
|
||||
self._storage.mark_welcome_sent(request_code)
|
||||
except MailerError:
|
||||
return
|
||||
73
ariadne/manager/provisioning_protocol.py
Normal file
73
ariadne/manager/provisioning_protocol.py
Normal file
@ -0,0 +1,73 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
import hashlib
|
||||
from typing import Any
|
||||
|
||||
MAILU_EMAIL_ATTR = "mailu_email"
|
||||
MAILU_APP_PASSWORD_ATTR = "mailu_app_password"
|
||||
MAILU_ENABLED_ATTR = "mailu_enabled"
|
||||
WGER_PASSWORD_ATTR = "wger_password"
|
||||
WGER_PASSWORD_UPDATED_ATTR = "wger_password_updated_at"
|
||||
FIREFLY_PASSWORD_ATTR = "firefly_password"
|
||||
FIREFLY_PASSWORD_UPDATED_ATTR = "firefly_password_updated_at"
|
||||
VAULTWARDEN_GRANDFATHERED_FLAG = "vaultwarden_grandfathered"
|
||||
|
||||
_RETRYABLE_HTTP_CODES = {429, 500, 502, 503, 504}
|
||||
_RETRYABLE_TOKENS = (
|
||||
"timeout",
|
||||
"temporar",
|
||||
"rate limited",
|
||||
"mailbox not ready",
|
||||
"connection refused",
|
||||
"connection reset",
|
||||
"network is unreachable",
|
||||
"dns",
|
||||
"name resolution",
|
||||
"service unavailable",
|
||||
"bad gateway",
|
||||
"gateway timeout",
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProvisionOutcome:
|
||||
ok: bool
|
||||
status: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class RequestContext:
|
||||
request_code: str
|
||||
username: str
|
||||
first_name: str
|
||||
last_name: str
|
||||
contact_email: str
|
||||
email_verified_at: datetime | None
|
||||
status: str
|
||||
initial_password: str | None
|
||||
revealed_at: datetime | None
|
||||
attempted_at: datetime | None
|
||||
approval_flags: list[str]
|
||||
user_id: str = ""
|
||||
mailu_email: str = ""
|
||||
|
||||
|
||||
def _advisory_lock_id(request_code: str) -> int:
|
||||
digest = hashlib.sha256(request_code.encode("utf-8")).digest()
|
||||
return int.from_bytes(digest[:8], "big", signed=True)
|
||||
|
||||
|
||||
def _extract_attr(attrs: Any, key: str) -> str:
|
||||
if not isinstance(attrs, dict):
|
||||
return ""
|
||||
raw = attrs.get(key)
|
||||
if isinstance(raw, list):
|
||||
for item in raw:
|
||||
if isinstance(item, str) and item.strip():
|
||||
return item.strip()
|
||||
return ""
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
return raw.strip()
|
||||
return ""
|
||||
176
ariadne/manager/provisioning_tasks.py
Normal file
176
ariadne/manager/provisioning_tasks.py
Normal file
@ -0,0 +1,176 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import re
|
||||
|
||||
from ..db.storage import TaskRunRecord
|
||||
from .provisioning_protocol import _RETRYABLE_HTTP_CODES, _RETRYABLE_TOKENS
|
||||
|
||||
|
||||
class _ProvisioningTaskMixin:
|
||||
def _ensure_task_rows(self, conn, request_code: str, tasks: list[str]) -> None:
|
||||
if not tasks:
|
||||
return
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO access_request_tasks (request_code, task, status, detail, updated_at)
|
||||
SELECT %s, task, 'pending', NULL, NOW()
|
||||
FROM UNNEST(%s::text[]) AS task
|
||||
ON CONFLICT (request_code, task) DO NOTHING
|
||||
""",
|
||||
(request_code, tasks),
|
||||
)
|
||||
|
||||
def _upsert_task(self, conn, request_code: str, task: str, status: str, detail: str | None = None) -> None:
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO access_request_tasks (request_code, task, status, detail, updated_at)
|
||||
VALUES (%s, %s, %s, %s, NOW())
|
||||
ON CONFLICT (request_code, task)
|
||||
DO UPDATE SET status = EXCLUDED.status, detail = EXCLUDED.detail, updated_at = NOW()
|
||||
""",
|
||||
(request_code, task, status, detail),
|
||||
)
|
||||
|
||||
def _task_statuses(self, conn, request_code: str) -> dict[str, str]:
|
||||
rows = conn.execute(
|
||||
"SELECT task, status FROM access_request_tasks WHERE request_code = %s",
|
||||
(request_code,),
|
||||
).fetchall()
|
||||
output: dict[str, str] = {}
|
||||
for row in rows:
|
||||
task = row.get("task") if isinstance(row, dict) else None
|
||||
status = row.get("status") if isinstance(row, dict) else None
|
||||
if isinstance(task, str) and isinstance(status, str):
|
||||
output[task] = status
|
||||
return output
|
||||
|
||||
def _all_tasks_ok(self, conn, request_code: str, tasks: list[str]) -> bool:
|
||||
statuses = self._task_statuses(conn, request_code)
|
||||
for task in tasks:
|
||||
if statuses.get(task) != "ok":
|
||||
return False
|
||||
return True
|
||||
|
||||
def _record_task(self, request_code: str, task: str, status: str, detail: str | None, started: datetime) -> None:
|
||||
finished = datetime.now(timezone.utc)
|
||||
duration_sec = (finished - started).total_seconds()
|
||||
self._record_task_run_metric(task, status, duration_sec)
|
||||
self._logger.info(
|
||||
"task run",
|
||||
extra={
|
||||
"event": "task_run",
|
||||
"request_code": request_code,
|
||||
"task": task,
|
||||
"status": status,
|
||||
"duration_sec": round(duration_sec, 3),
|
||||
"detail": detail or "",
|
||||
},
|
||||
)
|
||||
try:
|
||||
self._storage.record_event(
|
||||
"provision_task",
|
||||
{
|
||||
"request_code": request_code,
|
||||
"task": task,
|
||||
"status": status,
|
||||
"duration_sec": round(duration_sec, 3),
|
||||
"detail": detail or "",
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
self._storage.record_task_run(
|
||||
TaskRunRecord(
|
||||
request_code=request_code,
|
||||
task=task,
|
||||
status=status,
|
||||
detail=detail,
|
||||
started_at=started,
|
||||
finished_at=finished,
|
||||
duration_ms=int(duration_sec * 1000),
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _task_ok(self, conn, request_code: str, task: str, detail: str | None, started: datetime) -> None:
|
||||
self._upsert_task(conn, request_code, task, "ok", detail)
|
||||
self._record_task(request_code, task, "ok", detail, started)
|
||||
|
||||
def _task_error(self, conn, request_code: str, task: str, detail: str, started: datetime) -> None:
|
||||
self._upsert_task(conn, request_code, task, "error", detail)
|
||||
self._record_task(request_code, task, "error", detail, started)
|
||||
|
||||
def _task_pending(self, conn, request_code: str, task: str, detail: str, started: datetime) -> None:
|
||||
self._upsert_task(conn, request_code, task, "pending", detail)
|
||||
self._record_task(request_code, task, "pending", detail, started)
|
||||
|
||||
def _is_retryable_detail(self, detail: str) -> bool:
|
||||
if not detail:
|
||||
return False
|
||||
detail_lower = detail.lower()
|
||||
match = re.match(r"^http\s+(\d{3})", detail_lower)
|
||||
if match:
|
||||
code = int(match.group(1))
|
||||
if code in _RETRYABLE_HTTP_CODES:
|
||||
return True
|
||||
return any(token in detail_lower for token in _RETRYABLE_TOKENS)
|
||||
|
||||
def _retryable_detail(self, detail: str) -> str:
|
||||
cleaned = detail.strip() if isinstance(detail, str) else ""
|
||||
if not cleaned:
|
||||
return "retryable: temporary failure"
|
||||
return f"retryable: {cleaned}"
|
||||
|
||||
def _task_fail(self, conn, request_code: str, task: str, detail: str, started: datetime) -> None:
|
||||
detail_lower = detail.lower()
|
||||
if "missing verified email address" in detail_lower or "email not verified" in detail_lower:
|
||||
self._task_pending(conn, request_code, task, "blocked: email not verified", started)
|
||||
return
|
||||
if self._is_retryable_detail(detail):
|
||||
self._task_pending(conn, request_code, task, self._retryable_detail(detail), started)
|
||||
return
|
||||
self._task_error(conn, request_code, task, detail, started)
|
||||
|
||||
def _vaultwarden_rate_limit_detail(self) -> tuple[str, datetime]:
|
||||
retry_at = datetime.now(timezone.utc) + timedelta(
|
||||
seconds=float(self._settings.vaultwarden_admin_rate_limit_backoff_sec)
|
||||
)
|
||||
retry_iso = retry_at.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
return f"rate limited until {retry_iso}", retry_at
|
||||
|
||||
@staticmethod
|
||||
def _parse_retry_at(detail: str) -> datetime | None:
|
||||
prefix = "rate limited until "
|
||||
if not isinstance(detail, str) or not detail.startswith(prefix):
|
||||
return None
|
||||
ts = detail[len(prefix) :].strip()
|
||||
for fmt in ("%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S%z"):
|
||||
try:
|
||||
parsed = datetime.strptime(ts, fmt)
|
||||
if parsed.tzinfo is None:
|
||||
parsed = parsed.replace(tzinfo=timezone.utc)
|
||||
return parsed
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
def _vaultwarden_retry_due(self, conn, request_code: str) -> bool:
|
||||
row = conn.execute(
|
||||
"""
|
||||
SELECT status, detail
|
||||
FROM access_request_tasks
|
||||
WHERE request_code = %s AND task = 'vaultwarden_invite'
|
||||
""",
|
||||
(request_code,),
|
||||
).fetchone()
|
||||
if not isinstance(row, dict):
|
||||
return True
|
||||
if row.get("status") != "pending":
|
||||
return True
|
||||
retry_at = self._parse_retry_at(row.get("detail") or "")
|
||||
if not retry_at:
|
||||
return True
|
||||
return datetime.now(timezone.utc) >= retry_at
|
||||
@ -72,18 +72,16 @@ CLUSTER_STATE_KUSTOMIZATIONS_NOT_READY = Gauge(
|
||||
|
||||
|
||||
def record_task_run(task: str, status: str, duration_sec: float | None) -> None:
|
||||
"""Increment task counters and duration histograms for one run."""
|
||||
|
||||
TASK_RUNS_TOTAL.labels(task=task, status=status).inc()
|
||||
if duration_sec is not None:
|
||||
TASK_DURATION_SECONDS.labels(task=task, status=status).observe(duration_sec)
|
||||
|
||||
|
||||
def record_schedule_state(
|
||||
task: str,
|
||||
last_run_ts: float | None,
|
||||
last_success_ts: float | None,
|
||||
next_run_ts: float | None,
|
||||
ok: bool | None,
|
||||
) -> None:
|
||||
def record_schedule_state(task: str, last_run_ts: float | None, last_success_ts: float | None, next_run_ts: float | None, ok: bool | None) -> None:
|
||||
"""Publish the latest scheduler timestamps and status for a task."""
|
||||
|
||||
if last_run_ts:
|
||||
SCHEDULE_LAST_RUN_TS.labels(task=task).set(last_run_ts)
|
||||
if last_success_ts:
|
||||
@ -97,17 +95,15 @@ def record_schedule_state(
|
||||
|
||||
|
||||
def set_access_request_counts(counts: dict[str, int]) -> None:
|
||||
"""Set access-request gauges grouped by lifecycle status."""
|
||||
|
||||
for status, count in counts.items():
|
||||
ACCESS_REQUESTS.labels(status=status).set(count)
|
||||
|
||||
|
||||
def set_cluster_state_metrics(
|
||||
collected_at: datetime,
|
||||
nodes_total: int | None,
|
||||
nodes_ready: int | None,
|
||||
pods_running: float | None,
|
||||
kustomizations_not_ready: int | None,
|
||||
) -> None:
|
||||
def set_cluster_state_metrics(collected_at: datetime, nodes_total: int | None, nodes_ready: int | None, pods_running: float | None, kustomizations_not_ready: int | None) -> None:
|
||||
"""Set cluster-state gauges from the most recent collector snapshot."""
|
||||
|
||||
CLUSTER_STATE_LAST_TS.set(collected_at.timestamp())
|
||||
if nodes_total is not None:
|
||||
CLUSTER_STATE_NODES_TOTAL.set(nodes_total)
|
||||
|
||||
@ -24,6 +24,8 @@ def _build_db(dsn: str, application_name: str) -> Database:
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""Run configured Ariadne and portal database migrations."""
|
||||
|
||||
if not settings.ariadne_run_migrations:
|
||||
return
|
||||
|
||||
|
||||
@ -22,6 +22,8 @@ class CronTask:
|
||||
|
||||
|
||||
class CronScheduler:
|
||||
"""Run named cron tasks while recording schedule state and outcomes."""
|
||||
|
||||
def __init__(self, storage: Storage, tick_sec: float = 5.0) -> None:
|
||||
self._storage = storage
|
||||
self._tick_sec = tick_sec
|
||||
@ -41,6 +43,7 @@ class CronScheduler:
|
||||
def start(self) -> None:
|
||||
if self._thread and self._thread.is_alive():
|
||||
return
|
||||
self._hydrate_schedule_metrics()
|
||||
self._stop_event.clear()
|
||||
self._thread = threading.Thread(target=self._run_loop, name="ariadne-scheduler", daemon=True)
|
||||
self._thread.start()
|
||||
@ -83,6 +86,40 @@ class CronScheduler:
|
||||
)
|
||||
time.sleep(self._tick_sec)
|
||||
|
||||
def _hydrate_schedule_metrics(self) -> None:
|
||||
try:
|
||||
states = self._storage.list_schedule_states()
|
||||
except AttributeError:
|
||||
return
|
||||
except Exception as exc:
|
||||
self._logger.warning(
|
||||
"schedule metric hydration failed",
|
||||
extra={"event": "schedule_hydration_error", "detail": str(exc)},
|
||||
)
|
||||
return
|
||||
|
||||
known_tasks = set(self._tasks)
|
||||
for state in states:
|
||||
if state.task_name not in known_tasks:
|
||||
continue
|
||||
last_finished = state.last_finished_at or state.last_started_at
|
||||
last_success = last_finished if state.last_status == "ok" else None
|
||||
if state.last_status == "ok":
|
||||
ok: bool | None = True
|
||||
elif state.last_status == "error":
|
||||
ok = False
|
||||
else:
|
||||
ok = None
|
||||
record_schedule_state(
|
||||
state.task_name,
|
||||
state.last_started_at.timestamp() if state.last_started_at else None,
|
||||
last_success.timestamp() if last_success else None,
|
||||
self._next_run.get(state.task_name).timestamp()
|
||||
if self._next_run.get(state.task_name)
|
||||
else None,
|
||||
ok,
|
||||
)
|
||||
|
||||
def _execute_task(self, task: CronTask) -> None:
|
||||
started = datetime.now(timezone.utc)
|
||||
status = "ok"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
180
ariadne/services/cluster_state_anomalies.py
Normal file
180
ariadne/services/cluster_state_anomalies.py
Normal file
@ -0,0 +1,180 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .cluster_state_contract import *
|
||||
from .cluster_state_relationships import *
|
||||
|
||||
def _severity_rank(value: Any) -> int:
|
||||
if value == "critical":
|
||||
return 0
|
||||
if value == "warning":
|
||||
return 1
|
||||
return 2
|
||||
|
||||
|
||||
def _pvc_pressure_signals(metrics: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
pvc_top = _pvc_top(metrics.get("pvc_usage_top", []))
|
||||
if not pvc_top:
|
||||
return []
|
||||
output: list[dict[str, Any]] = []
|
||||
for entry in pvc_top:
|
||||
used = entry.get("used_percent")
|
||||
if not isinstance(used, (int, float)) or used < _PVC_PRESSURE_THRESHOLD:
|
||||
continue
|
||||
output.append(
|
||||
{
|
||||
"scope": "pvc",
|
||||
"target": f"{entry.get('namespace')}/{entry.get('pvc')}",
|
||||
"metric": "used_percent",
|
||||
"current": used,
|
||||
"severity": "warning" if used < _PVC_CRITICAL_THRESHOLD else "critical",
|
||||
}
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def _build_anomalies(metrics: dict[str, Any], nodes_summary: dict[str, Any], workloads_health: dict[str, Any], kustomizations: dict[str, Any], events: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
anomalies: list[dict[str, Any]] = []
|
||||
_append_pod_anomalies(anomalies, metrics)
|
||||
_append_workload_anomalies(anomalies, workloads_health)
|
||||
_append_flux_anomalies(anomalies, kustomizations)
|
||||
_append_job_failure_anomalies(anomalies, metrics)
|
||||
_append_pvc_anomalies(anomalies, metrics)
|
||||
_append_node_anomalies(anomalies, nodes_summary)
|
||||
_append_event_anomalies(anomalies, events)
|
||||
return anomalies
|
||||
|
||||
|
||||
def _append_pod_anomalies(anomalies: list[dict[str, Any]], metrics: dict[str, Any]) -> None:
|
||||
pods_pending = metrics.get("pods_pending") or 0
|
||||
pods_failed = metrics.get("pods_failed") or 0
|
||||
if pods_pending:
|
||||
anomalies.append(
|
||||
{
|
||||
"kind": "pods_pending",
|
||||
"severity": "warning",
|
||||
"summary": f"{int(pods_pending)} pods pending",
|
||||
}
|
||||
)
|
||||
if pods_failed:
|
||||
anomalies.append(
|
||||
{
|
||||
"kind": "pods_failed",
|
||||
"severity": "critical",
|
||||
"summary": f"{int(pods_failed)} pods failed",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _append_workload_anomalies(anomalies: list[dict[str, Any]], workloads_health: dict[str, Any]) -> None:
|
||||
for key in ("deployments", "statefulsets", "daemonsets"):
|
||||
entry = workloads_health.get(key) if isinstance(workloads_health.get(key), dict) else {}
|
||||
not_ready = entry.get("not_ready") or 0
|
||||
if not_ready:
|
||||
anomalies.append(
|
||||
{
|
||||
"kind": f"{key}_not_ready",
|
||||
"severity": "warning",
|
||||
"summary": f"{int(not_ready)} {key} not ready",
|
||||
"items": entry.get("items"),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _append_flux_anomalies(anomalies: list[dict[str, Any]], kustomizations: dict[str, Any]) -> None:
|
||||
flux_not_ready = (kustomizations or {}).get("not_ready") or 0
|
||||
if flux_not_ready:
|
||||
anomalies.append(
|
||||
{
|
||||
"kind": "flux_not_ready",
|
||||
"severity": "warning",
|
||||
"summary": f"{int(flux_not_ready)} Flux kustomizations not ready",
|
||||
"items": (kustomizations or {}).get("items"),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _append_job_failure_anomalies(anomalies: list[dict[str, Any]], metrics: dict[str, Any]) -> None:
|
||||
job_failures = metrics.get("job_failures_24h") or []
|
||||
job_failures = [
|
||||
entry for entry in job_failures if isinstance(entry, dict) and (entry.get("value") or 0) > 0
|
||||
]
|
||||
if job_failures:
|
||||
anomalies.append(
|
||||
{
|
||||
"kind": "job_failures_24h",
|
||||
"severity": "warning",
|
||||
"summary": "Job failures in last 24h",
|
||||
"items": job_failures[:5],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _append_pvc_anomalies(anomalies: list[dict[str, Any]], metrics: dict[str, Any]) -> None:
|
||||
pvc_pressure = _pvc_pressure_entries(metrics)
|
||||
if pvc_pressure:
|
||||
anomalies.append(
|
||||
{
|
||||
"kind": "pvc_pressure",
|
||||
"severity": "warning",
|
||||
"summary": f"PVCs above {_PVC_PRESSURE_THRESHOLD:.0f}% usage",
|
||||
"items": pvc_pressure[:5],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _pvc_pressure_entries(metrics: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
pvc_top = _pvc_top(metrics.get("pvc_usage_top") or [])
|
||||
return [
|
||||
entry
|
||||
for entry in pvc_top
|
||||
if isinstance(entry, dict)
|
||||
and isinstance(entry.get("used_percent"), (int, float))
|
||||
and float(entry.get("used_percent") or 0) >= _PVC_PRESSURE_THRESHOLD
|
||||
]
|
||||
|
||||
|
||||
def _append_node_anomalies(anomalies: list[dict[str, Any]], nodes_summary: dict[str, Any]) -> None:
|
||||
if not nodes_summary:
|
||||
return
|
||||
pressure_nodes = nodes_summary.get("pressure_nodes") or {}
|
||||
flagged = [
|
||||
name for names in pressure_nodes.values() if isinstance(names, list) for name in names if name
|
||||
]
|
||||
if flagged:
|
||||
anomalies.append(
|
||||
{
|
||||
"kind": "node_pressure",
|
||||
"severity": "warning",
|
||||
"summary": f"{len(flagged)} nodes report pressure",
|
||||
"items": sorted(set(flagged)),
|
||||
}
|
||||
)
|
||||
unschedulable = nodes_summary.get("unschedulable_nodes") or []
|
||||
if unschedulable:
|
||||
anomalies.append(
|
||||
{
|
||||
"kind": "unschedulable_nodes",
|
||||
"severity": "info",
|
||||
"summary": f"{len(unschedulable)} nodes unschedulable",
|
||||
"items": unschedulable,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _append_event_anomalies(anomalies: list[dict[str, Any]], events: dict[str, Any]) -> None:
|
||||
if not events:
|
||||
return
|
||||
warnings = events.get("warnings_total") or 0
|
||||
if warnings:
|
||||
anomalies.append(
|
||||
{
|
||||
"kind": "event_warnings",
|
||||
"severity": "info",
|
||||
"summary": f"{int(warnings)} warning events",
|
||||
"items": events.get("warnings") or [],
|
||||
}
|
||||
)
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
117
ariadne/services/cluster_state_attention.py
Normal file
117
ariadne/services/cluster_state_attention.py
Normal file
@ -0,0 +1,117 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .cluster_state_anomalies import *
|
||||
from .cluster_state_contract import *
|
||||
from .cluster_state_health import *
|
||||
|
||||
def _node_attention_score(node: dict[str, Any]) -> tuple[float, list[str]]:
|
||||
score = 0.0
|
||||
reasons: list[str] = []
|
||||
disk = node.get("disk")
|
||||
if isinstance(disk, (int, float)) and disk >= _NODE_DISK_ALERT:
|
||||
score += 3 + (disk - _NODE_DISK_ALERT) / 10
|
||||
reasons.append(f"disk {disk:.1f}%")
|
||||
cpu = node.get("cpu")
|
||||
if isinstance(cpu, (int, float)) and cpu >= _NODE_CPU_ALERT:
|
||||
score += 2 + (cpu - _NODE_CPU_ALERT) / 20
|
||||
reasons.append(f"cpu {cpu:.1f}%")
|
||||
ram = node.get("ram")
|
||||
if isinstance(ram, (int, float)) and ram >= _NODE_RAM_ALERT:
|
||||
score += 2 + (ram - _NODE_RAM_ALERT) / 20
|
||||
reasons.append(f"ram {ram:.1f}%")
|
||||
baseline = node.get("baseline") if isinstance(node.get("baseline"), dict) else {}
|
||||
for key, label, multiplier in (("net", "net", _NET_SPIKE_MULTIPLIER), ("io", "io", _IO_SPIKE_MULTIPLIER)):
|
||||
current = node.get(key)
|
||||
base = baseline.get(key) if isinstance(baseline.get(key), dict) else {}
|
||||
base_max = base.get("max")
|
||||
if isinstance(current, (int, float)) and isinstance(base_max, (int, float)) and base_max > 0:
|
||||
if current > base_max * multiplier:
|
||||
score += 1.5
|
||||
reasons.append(f"{label} {current:.2f} > {multiplier:.1f}x baseline")
|
||||
pressure = node.get("pressure_flags") if isinstance(node.get("pressure_flags"), list) else []
|
||||
if pressure:
|
||||
score += 2
|
||||
reasons.append("pressure flags")
|
||||
return score, reasons
|
||||
|
||||
|
||||
def _node_attention_entries(node_context: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
entries: list[dict[str, Any]] = []
|
||||
for node in node_context:
|
||||
if not isinstance(node, dict):
|
||||
continue
|
||||
name = node.get("node")
|
||||
if not isinstance(name, str) or not name:
|
||||
continue
|
||||
score, reasons = _node_attention_score(node)
|
||||
if score > 0:
|
||||
entries.append(
|
||||
{
|
||||
"kind": "node",
|
||||
"target": name,
|
||||
"score": round(score, 2),
|
||||
"reasons": reasons,
|
||||
}
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def _pvc_attention_entries(metrics: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
entries: list[dict[str, Any]] = []
|
||||
for item in _pvc_pressure_entries(metrics):
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
used = float(item.get("used_percent") or 0)
|
||||
entries.append(
|
||||
{
|
||||
"kind": "pvc",
|
||||
"target": f"{item.get('namespace')}/{item.get('pvc')}",
|
||||
"score": round(1 + (used - _PVC_PRESSURE_THRESHOLD) / 10, 2),
|
||||
"reasons": [f"usage {used:.1f}%"],
|
||||
}
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def _pod_attention_entries(pod_issues: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
entries: list[dict[str, Any]] = []
|
||||
pending = pod_issues.get("pending_over_15m") or 0
|
||||
if pending:
|
||||
entries.append(
|
||||
{
|
||||
"kind": "pods",
|
||||
"target": "pending",
|
||||
"score": float(pending),
|
||||
"reasons": [f"{int(pending)} pending >15m"],
|
||||
}
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def _workload_attention_entries(workloads_health: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
entries: list[dict[str, Any]] = []
|
||||
for item in _workload_not_ready_items(workloads_health)[:5]:
|
||||
entries.append(
|
||||
{
|
||||
"kind": "workload",
|
||||
"target": f"{item.get('namespace')}/{item.get('name')}",
|
||||
"score": 2.0,
|
||||
"reasons": [f"{item.get('ready')}/{item.get('desired')} ready"],
|
||||
}
|
||||
)
|
||||
return entries
|
||||
|
||||
|
||||
def _build_attention_ranked(metrics: dict[str, Any], node_context: list[dict[str, Any]], pod_issues: dict[str, Any], workloads_health: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
entries = (
|
||||
_node_attention_entries(node_context)
|
||||
+ _pvc_attention_entries(metrics)
|
||||
+ _pod_attention_entries(pod_issues)
|
||||
+ _workload_attention_entries(workloads_health)
|
||||
)
|
||||
entries.sort(key=lambda item: (-(item.get("score") or 0), item.get("kind") or "", item.get("target") or ""))
|
||||
return entries[:5]
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
121
ariadne/services/cluster_state_contract.py
Normal file
121
ariadne/services/cluster_state_contract.py
Normal file
@ -0,0 +1,121 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
_VALUE_PAIR_LEN = 2
|
||||
_RATE_WINDOW = "5m"
|
||||
_RESTARTS_WINDOW = "1h"
|
||||
_BASELINE_WINDOW = "24h"
|
||||
_TREND_WINDOWS = ("1h", "6h", "24h")
|
||||
_TREND_NODE_LIMIT = 30
|
||||
_TREND_NAMESPACE_LIMIT = 20
|
||||
_TREND_PVC_LIMIT = 10
|
||||
_TREND_JOB_LIMIT = 10
|
||||
_TREND_POD_LIMIT = 15
|
||||
_NODE_DISK_ALERT = 80.0
|
||||
_NODE_CPU_ALERT = 80.0
|
||||
_NODE_RAM_ALERT = 80.0
|
||||
_NET_SPIKE_MULTIPLIER = 2.0
|
||||
_IO_SPIKE_MULTIPLIER = 2.0
|
||||
_NODE_UNAME_LABEL = 'node_uname_info{nodename!=""}'
|
||||
_WORKLOAD_LABEL_KEYS = (
|
||||
"app.kubernetes.io/name",
|
||||
"app",
|
||||
"k8s-app",
|
||||
"app.kubernetes.io/instance",
|
||||
"release",
|
||||
)
|
||||
_SYSTEM_NAMESPACES = {
|
||||
"kube-system",
|
||||
"kube-public",
|
||||
"kube-node-lease",
|
||||
"flux-system",
|
||||
"monitoring",
|
||||
"logging",
|
||||
"traefik",
|
||||
"cert-manager",
|
||||
"maintenance",
|
||||
"postgres",
|
||||
"vault",
|
||||
}
|
||||
_WORKLOAD_ALLOWED_NAMESPACES = {
|
||||
"maintenance",
|
||||
}
|
||||
_BASELINE_DELTA_WARN = 50.0
|
||||
_BASELINE_DELTA_CRIT = 100.0
|
||||
_SIGNAL_LIMIT = 15
|
||||
_PROFILE_LIMIT = 6
|
||||
_WORKLOAD_INDEX_LIMIT = 20
|
||||
_NODE_WORKLOAD_LIMIT = 12
|
||||
_NODE_WORKLOAD_TOP = 3
|
||||
_EVENTS_SUMMARY_LIMIT = 5
|
||||
_PVC_CRITICAL_THRESHOLD = 90.0
|
||||
_CAPACITY_KEYS = {
|
||||
"cpu",
|
||||
"memory",
|
||||
"pods",
|
||||
"ephemeral-storage",
|
||||
}
|
||||
_PRESSURE_TYPES = {
|
||||
"MemoryPressure",
|
||||
"DiskPressure",
|
||||
"PIDPressure",
|
||||
"NetworkUnavailable",
|
||||
}
|
||||
_EVENTS_MAX = 20
|
||||
_EVENT_WARNING = "Warning"
|
||||
_PHASE_SEVERITY = {
|
||||
"Failed": 3,
|
||||
"Pending": 2,
|
||||
"Unknown": 1,
|
||||
}
|
||||
_PENDING_15M_HOURS = 0.25
|
||||
_LOAD_TOP_COUNT = 5
|
||||
_NAMESPACE_TOP_COUNT = 5
|
||||
_PVC_PRESSURE_THRESHOLD = 80.0
|
||||
_ALERT_TOP_LIMIT = 10
|
||||
_POD_REASON_LIMIT = 10
|
||||
_POD_REASON_TREND_LIMIT = 10
|
||||
_NAMESPACE_ISSUE_LIMIT = 8
|
||||
_CROSS_NODE_TOP = 3
|
||||
_CROSS_NAMESPACE_TOP = 3
|
||||
_CROSS_PVC_TOP = 3
|
||||
_POD_TERMINATED_REASONS = {
|
||||
"oom_killed": "OOMKilled",
|
||||
"error": "Error",
|
||||
}
|
||||
_POD_WAITING_REASONS = {
|
||||
"crash_loop": "CrashLoopBackOff",
|
||||
"image_pull_backoff": "ImagePullBackOff",
|
||||
"err_image_pull": "ErrImagePull",
|
||||
"create_config_error": "CreateContainerConfigError",
|
||||
}
|
||||
_DELTA_TOP_LIMIT = 6
|
||||
_REASON_TOP_LIMIT = 5
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ClusterStateSummary:
|
||||
nodes_total: int | None
|
||||
nodes_ready: int | None
|
||||
pods_running: int | None
|
||||
kustomizations_not_ready: int | None
|
||||
errors: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SignalContext:
|
||||
metrics: dict[str, Any]
|
||||
node_context: list[dict[str, Any]]
|
||||
namespace_context: list[dict[str, Any]]
|
||||
workloads_health: dict[str, Any]
|
||||
pod_issues: dict[str, Any]
|
||||
kustomizations: dict[str, Any]
|
||||
|
||||
|
||||
def _items(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
items = payload.get("items") if isinstance(payload.get("items"), list) else []
|
||||
return [item for item in items if isinstance(item, dict)]
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
105
ariadne/services/cluster_state_fetchers.py
Normal file
105
ariadne/services/cluster_state_fetchers.py
Normal file
@ -0,0 +1,105 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
from ..k8s.client import get_json as _default_get_json
|
||||
from .cluster_state_flux_events import *
|
||||
from .cluster_state_nodes import *
|
||||
from .cluster_state_pods import *
|
||||
from .cluster_state_workloads import *
|
||||
|
||||
|
||||
PodFetchResult = tuple[list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]], list[dict[str, Any]], dict[str, Any]]
|
||||
|
||||
|
||||
def _get_json(path: str) -> dict[str, Any]:
|
||||
facade = sys.modules.get("ariadne.services.cluster_state")
|
||||
getter = getattr(facade, "get_json", _default_get_json) if facade is not None else _default_get_json
|
||||
return getter(path)
|
||||
|
||||
def _fetch_nodes(errors: list[str]) -> tuple[dict[str, Any], list[dict[str, Any]], dict[str, Any]]:
|
||||
nodes: dict[str, Any] = {}
|
||||
details: list[dict[str, Any]] = []
|
||||
summary: dict[str, Any] = {}
|
||||
try:
|
||||
payload = _get_json("/api/v1/nodes")
|
||||
nodes = _summarize_nodes(payload)
|
||||
details = _node_details(payload)
|
||||
summary = _summarize_inventory(details)
|
||||
except Exception as exc:
|
||||
errors.append(f"nodes: {exc}")
|
||||
return nodes, details, summary
|
||||
|
||||
|
||||
def _fetch_flux(errors: list[str]) -> dict[str, Any]:
|
||||
try:
|
||||
payload = _get_json(
|
||||
"/apis/kustomize.toolkit.fluxcd.io/v1/namespaces/flux-system/kustomizations"
|
||||
)
|
||||
return _summarize_kustomizations(payload)
|
||||
except Exception as exc:
|
||||
errors.append(f"flux: {exc}")
|
||||
return {}
|
||||
|
||||
|
||||
def _fetch_pods(errors: list[str]) -> PodFetchResult:
|
||||
workloads: list[dict[str, Any]] = []
|
||||
namespace_pods: list[dict[str, Any]] = []
|
||||
namespace_nodes: list[dict[str, Any]] = []
|
||||
node_pods: list[dict[str, Any]] = []
|
||||
pod_issues: dict[str, Any] = {}
|
||||
try:
|
||||
pods_payload = _get_json("/api/v1/pods?limit=5000")
|
||||
workloads = _summarize_workloads(pods_payload)
|
||||
namespace_pods = _summarize_namespace_pods(pods_payload)
|
||||
namespace_nodes = _summarize_namespace_nodes(pods_payload)
|
||||
node_pods = _summarize_node_pods(pods_payload)
|
||||
pod_issues = _summarize_pod_issues(pods_payload)
|
||||
except Exception as exc:
|
||||
errors.append(f"pods: {exc}")
|
||||
return workloads, namespace_pods, namespace_nodes, node_pods, pod_issues
|
||||
|
||||
|
||||
def _fetch_jobs(errors: list[str]) -> dict[str, Any]:
|
||||
try:
|
||||
jobs_payload = _get_json("/apis/batch/v1/jobs?limit=2000")
|
||||
return _summarize_jobs(jobs_payload)
|
||||
except Exception as exc:
|
||||
errors.append(f"jobs: {exc}")
|
||||
return {}
|
||||
|
||||
def _fetch_longhorn(errors: list[str]) -> dict[str, Any]:
|
||||
try:
|
||||
payload = _get_json(
|
||||
"/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes"
|
||||
)
|
||||
return _summarize_longhorn_volumes(payload)
|
||||
except Exception as exc:
|
||||
errors.append(f"longhorn: {exc}")
|
||||
return {}
|
||||
|
||||
|
||||
def _fetch_workload_health(errors: list[str]) -> dict[str, Any]:
|
||||
try:
|
||||
deployments_payload = _get_json("/apis/apps/v1/deployments?limit=2000")
|
||||
statefulsets_payload = _get_json("/apis/apps/v1/statefulsets?limit=2000")
|
||||
daemonsets_payload = _get_json("/apis/apps/v1/daemonsets?limit=2000")
|
||||
deployments = _summarize_deployments(deployments_payload)
|
||||
statefulsets = _summarize_statefulsets(statefulsets_payload)
|
||||
daemonsets = _summarize_daemonsets(daemonsets_payload)
|
||||
return _summarize_workload_health(deployments, statefulsets, daemonsets)
|
||||
except Exception as exc:
|
||||
errors.append(f"workloads_health: {exc}")
|
||||
return {}
|
||||
|
||||
|
||||
def _fetch_events(errors: list[str]) -> dict[str, Any]:
|
||||
try:
|
||||
events_payload = _get_json("/api/v1/events?limit=2000")
|
||||
return _summarize_events(events_payload)
|
||||
except Exception as exc:
|
||||
errors.append(f"events: {exc}")
|
||||
return {}
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
117
ariadne/services/cluster_state_flux_events.py
Normal file
117
ariadne/services/cluster_state_flux_events.py
Normal file
@ -0,0 +1,117 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from .cluster_state_contract import *
|
||||
from .cluster_state_nodes import *
|
||||
|
||||
def _summarize_kustomizations(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
not_ready: list[dict[str, Any]] = []
|
||||
for item in _items(payload):
|
||||
metadata = item.get("metadata") if isinstance(item.get("metadata"), dict) else {}
|
||||
spec = item.get("spec") if isinstance(item.get("spec"), dict) else {}
|
||||
status = item.get("status") if isinstance(item.get("status"), dict) else {}
|
||||
name = metadata.get("name") if isinstance(metadata.get("name"), str) else ""
|
||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
||||
conditions = status.get("conditions")
|
||||
ready, reason, message = _condition_status(conditions, "Ready")
|
||||
suspended = bool(spec.get("suspend"))
|
||||
if ready is True and not suspended:
|
||||
continue
|
||||
not_ready.append(
|
||||
{
|
||||
"name": name,
|
||||
"namespace": namespace,
|
||||
"ready": ready,
|
||||
"suspended": suspended,
|
||||
"reason": reason,
|
||||
"message": message,
|
||||
}
|
||||
)
|
||||
not_ready.sort(key=lambda item: (item.get("namespace") or "", item.get("name") or ""))
|
||||
return {
|
||||
"total": len(_items(payload)),
|
||||
"not_ready": len(not_ready),
|
||||
"items": not_ready,
|
||||
}
|
||||
|
||||
|
||||
def _namespace_allowed(namespace: str) -> bool:
|
||||
if not namespace:
|
||||
return False
|
||||
if namespace in _WORKLOAD_ALLOWED_NAMESPACES:
|
||||
return True
|
||||
return namespace not in _SYSTEM_NAMESPACES
|
||||
|
||||
|
||||
def _event_timestamp(event: dict[str, Any]) -> str:
|
||||
for key in ("eventTime", "lastTimestamp", "firstTimestamp"):
|
||||
value = event.get(key)
|
||||
if isinstance(value, str) and value:
|
||||
return value
|
||||
return ""
|
||||
|
||||
|
||||
def _event_sort_key(timestamp: str) -> float:
|
||||
if not timestamp:
|
||||
return 0.0
|
||||
try:
|
||||
return datetime.fromisoformat(timestamp.replace("Z", "+00:00")).timestamp()
|
||||
except ValueError:
|
||||
return 0.0
|
||||
|
||||
|
||||
def _summarize_events(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
warnings: list[dict[str, Any]] = []
|
||||
by_reason: dict[str, int] = {}
|
||||
by_namespace: dict[str, int] = {}
|
||||
for event in _items(payload):
|
||||
metadata = event.get("metadata") if isinstance(event.get("metadata"), dict) else {}
|
||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
||||
if not _namespace_allowed(namespace):
|
||||
continue
|
||||
event_type = event.get("type") if isinstance(event.get("type"), str) else ""
|
||||
if event_type != _EVENT_WARNING:
|
||||
continue
|
||||
reason = event.get("reason") if isinstance(event.get("reason"), str) else ""
|
||||
message = event.get("message") if isinstance(event.get("message"), str) else ""
|
||||
count = event.get("count") if isinstance(event.get("count"), int) else 1
|
||||
involved = (
|
||||
event.get("involvedObject") if isinstance(event.get("involvedObject"), dict) else {}
|
||||
)
|
||||
timestamp = _event_timestamp(event)
|
||||
warnings.append(
|
||||
{
|
||||
"namespace": namespace,
|
||||
"reason": reason,
|
||||
"message": message,
|
||||
"count": count,
|
||||
"last_seen": timestamp,
|
||||
"object_kind": involved.get("kind") or "",
|
||||
"object_name": involved.get("name") or "",
|
||||
}
|
||||
)
|
||||
if reason:
|
||||
by_reason[reason] = by_reason.get(reason, 0) + count
|
||||
if namespace:
|
||||
by_namespace[namespace] = by_namespace.get(namespace, 0) + count
|
||||
warnings.sort(key=lambda item: _event_sort_key(item.get("last_seen") or ""), reverse=True)
|
||||
top = warnings[:_EVENTS_MAX]
|
||||
top_reason = ""
|
||||
top_reason_count = 0
|
||||
if by_reason:
|
||||
top_reason, top_reason_count = sorted(
|
||||
by_reason.items(), key=lambda item: (-item[1], item[0])
|
||||
)[0]
|
||||
latest_warning = top[0] if top else None
|
||||
return {
|
||||
"warnings_total": len(warnings),
|
||||
"warnings_by_reason": by_reason,
|
||||
"warnings_by_namespace": by_namespace,
|
||||
"warnings_recent": top,
|
||||
"warnings_top_reason": {"reason": top_reason, "count": top_reason_count},
|
||||
"warnings_latest": latest_warning,
|
||||
}
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
75
ariadne/services/cluster_state_health.py
Normal file
75
ariadne/services/cluster_state_health.py
Normal file
@ -0,0 +1,75 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .cluster_state_contract import *
|
||||
|
||||
HealthRows = list[dict[str, Any]]
|
||||
|
||||
|
||||
def _health_bullets(metrics: dict[str, Any], nodes_summary: dict[str, Any], workloads_health: dict[str, Any], anomalies: HealthRows) -> list[str]:
|
||||
bullets: list[str] = []
|
||||
nodes_total = metrics.get("nodes_total")
|
||||
nodes_ready = metrics.get("nodes_ready")
|
||||
if nodes_total is not None and nodes_ready is not None:
|
||||
bullets.append(f"Nodes ready: {int(nodes_ready)}/{int(nodes_total)}")
|
||||
pods_running = metrics.get("pods_running") or 0
|
||||
pods_pending = metrics.get("pods_pending") or 0
|
||||
pods_failed = metrics.get("pods_failed") or 0
|
||||
bullets.append(f"Pods: {int(pods_running)} running, {int(pods_pending)} pending, {int(pods_failed)} failed")
|
||||
not_ready = 0
|
||||
for key in ("deployments", "statefulsets", "daemonsets"):
|
||||
entry = workloads_health.get(key) if isinstance(workloads_health.get(key), dict) else {}
|
||||
not_ready += int(entry.get("not_ready") or 0)
|
||||
if not_ready:
|
||||
bullets.append(f"Workloads not ready: {not_ready}")
|
||||
else:
|
||||
bullets.append("Workloads: all ready")
|
||||
if anomalies:
|
||||
top = anomalies[0].get("summary") if isinstance(anomalies[0], dict) else None
|
||||
if isinstance(top, str) and top:
|
||||
bullets.append(f"Top concern: {top}")
|
||||
return bullets[:4]
|
||||
|
||||
|
||||
def _workload_not_ready_items(workloads_health: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for key in ("deployments", "statefulsets", "daemonsets"):
|
||||
entry = workloads_health.get(key) if isinstance(workloads_health.get(key), dict) else {}
|
||||
for item in entry.get("items") or []:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
output.append(
|
||||
{
|
||||
"kind": key[:-1],
|
||||
"namespace": item.get("namespace") or "",
|
||||
"name": item.get("name") or "",
|
||||
"desired": item.get("desired"),
|
||||
"ready": item.get("ready"),
|
||||
}
|
||||
)
|
||||
output.sort(key=lambda item: (item.get("namespace") or "", item.get("name") or ""))
|
||||
return output
|
||||
|
||||
|
||||
def _pod_restarts_top(metrics: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for item in metrics.get("top_restarts_1h") or []:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
||||
namespace = metric.get("namespace")
|
||||
pod = metric.get("pod")
|
||||
if not isinstance(namespace, str) or not isinstance(pod, str):
|
||||
continue
|
||||
output.append(
|
||||
{
|
||||
"namespace": namespace,
|
||||
"pod": pod,
|
||||
"value": item.get("value"),
|
||||
}
|
||||
)
|
||||
output.sort(key=lambda item: (-(item.get("value") or 0), item.get("namespace") or ""))
|
||||
return output[:5]
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
361
ariadne/services/cluster_state_metric_collectors.py
Normal file
361
ariadne/services/cluster_state_metric_collectors.py
Normal file
@ -0,0 +1,361 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .cluster_state_anomalies import *
|
||||
from .cluster_state_contract import *
|
||||
from .cluster_state_flux_events import *
|
||||
from .cluster_state_health import *
|
||||
from .cluster_state_vm_client import *
|
||||
from .cluster_state_vm_trends import *
|
||||
from .cluster_state_vm_usage import *
|
||||
|
||||
def _collect_vm_core(metrics: dict[str, Any], errors: list[str]) -> None:
|
||||
try:
|
||||
metrics["nodes_total"] = _vm_scalar("count(kube_node_info)")
|
||||
metrics["nodes_ready"] = _vm_scalar(
|
||||
"count(kube_node_status_condition{condition=\"Ready\",status=\"true\"})"
|
||||
)
|
||||
metrics["capacity_cpu"] = _vm_scalar("sum(kube_node_status_capacity_cpu_cores)")
|
||||
metrics["allocatable_cpu"] = _vm_scalar("sum(kube_node_status_allocatable_cpu_cores)")
|
||||
metrics["capacity_mem_bytes"] = _vm_scalar("sum(kube_node_status_capacity_memory_bytes)")
|
||||
metrics["allocatable_mem_bytes"] = _vm_scalar("sum(kube_node_status_allocatable_memory_bytes)")
|
||||
metrics["capacity_pods"] = _vm_scalar("sum(kube_node_status_capacity_pods)")
|
||||
metrics["allocatable_pods"] = _vm_scalar("sum(kube_node_status_allocatable_pods)")
|
||||
metrics["pods_running"] = _vm_scalar("sum(kube_pod_status_phase{phase=\"Running\"})")
|
||||
metrics["pods_pending"] = _vm_scalar("sum(kube_pod_status_phase{phase=\"Pending\"})")
|
||||
metrics["pods_failed"] = _vm_scalar("sum(kube_pod_status_phase{phase=\"Failed\"})")
|
||||
metrics["pods_succeeded"] = _vm_scalar("sum(kube_pod_status_phase{phase=\"Succeeded\"})")
|
||||
metrics["top_restarts_1h"] = _vm_vector(
|
||||
f"topk(5, sum by (namespace,pod) (increase(kube_pod_container_status_restarts_total[{_RESTARTS_WINDOW}])))"
|
||||
)
|
||||
metrics["restart_namespace_top"] = _filter_namespace_vector(
|
||||
_vm_vector(
|
||||
f"topk(5, sum by (namespace) (increase(kube_pod_container_status_restarts_total[{_RESTARTS_WINDOW}])))"
|
||||
)
|
||||
)
|
||||
metrics["pod_cpu_top"] = _filter_namespace_vector(
|
||||
_vm_vector(
|
||||
f'topk(5, sum by (namespace,pod) (rate(container_cpu_usage_seconds_total{{namespace!=""}}[{_RATE_WINDOW}])))'
|
||||
)
|
||||
)
|
||||
metrics["pod_cpu_top_node"] = _filter_namespace_vector(
|
||||
_vm_vector(
|
||||
f'topk(5, sum by (node,namespace,pod) (rate(container_cpu_usage_seconds_total{{namespace!=""}}[{_RATE_WINDOW}]) * on (namespace,pod) group_left(node) kube_pod_info))'
|
||||
)
|
||||
)
|
||||
metrics["pod_mem_top"] = _filter_namespace_vector(
|
||||
_vm_vector(
|
||||
"topk(5, sum by (namespace,pod) (container_memory_working_set_bytes{namespace!=\"\"}))"
|
||||
)
|
||||
)
|
||||
metrics["pod_mem_top_node"] = _filter_namespace_vector(
|
||||
_vm_vector(
|
||||
"topk(5, sum by (node,namespace,pod) (container_memory_working_set_bytes{namespace!=\"\"} * on (namespace,pod) group_left(node) kube_pod_info))"
|
||||
)
|
||||
)
|
||||
metrics["job_failures_24h"] = _vm_vector(
|
||||
"topk(5, sum by (namespace,job_name) (increase(kube_job_status_failed[24h])))"
|
||||
)
|
||||
except Exception as exc:
|
||||
errors.append(f"vm: {exc}")
|
||||
|
||||
|
||||
def _collect_node_metrics(metrics: dict[str, Any], errors: list[str]) -> None:
|
||||
metrics["postgres_connections"] = _postgres_connections(errors)
|
||||
metrics["hottest_nodes"] = _hottest_nodes(errors)
|
||||
metrics["node_usage"] = _node_usage(errors)
|
||||
metrics["node_usage_stats"] = {
|
||||
"cpu": _usage_stats(metrics.get("node_usage", {}).get("cpu", [])),
|
||||
"ram": _usage_stats(metrics.get("node_usage", {}).get("ram", [])),
|
||||
"net": _usage_stats(metrics.get("node_usage", {}).get("net", [])),
|
||||
"io": _usage_stats(metrics.get("node_usage", {}).get("io", [])),
|
||||
"disk": _usage_stats(metrics.get("node_usage", {}).get("disk", [])),
|
||||
}
|
||||
try:
|
||||
node_exprs = _node_usage_exprs()
|
||||
node_baseline_map: dict[str, dict[str, dict[str, float]]] = {}
|
||||
for key, expr in node_exprs.items():
|
||||
baseline = _vm_baseline_map(expr, "node", _BASELINE_WINDOW)
|
||||
metrics.setdefault("node_baseline", {})[key] = _baseline_map_to_list(baseline, "node")
|
||||
for name, stats in baseline.items():
|
||||
node_baseline_map.setdefault(name, {})[key] = stats
|
||||
metrics["node_baseline_map"] = node_baseline_map
|
||||
except Exception as exc:
|
||||
errors.append(f"baseline: {exc}")
|
||||
|
||||
|
||||
def _collect_trend_metrics(metrics: dict[str, Any], errors: list[str]) -> None:
|
||||
try:
|
||||
metrics["node_trends"] = _build_metric_trends(
|
||||
_node_usage_exprs(),
|
||||
"node",
|
||||
"node",
|
||||
_TREND_WINDOWS,
|
||||
_TREND_NODE_LIMIT,
|
||||
)
|
||||
metrics["namespace_trends"] = _build_metric_trends(
|
||||
_namespace_usage_exprs(),
|
||||
"namespace",
|
||||
"namespace",
|
||||
_TREND_WINDOWS,
|
||||
_TREND_NAMESPACE_LIMIT,
|
||||
)
|
||||
metrics["namespace_request_trends"] = _build_metric_trends(
|
||||
_namespace_request_exprs(),
|
||||
"namespace",
|
||||
"namespace",
|
||||
_TREND_WINDOWS,
|
||||
_TREND_NAMESPACE_LIMIT,
|
||||
)
|
||||
metrics["restart_trends"] = {
|
||||
window: _restart_namespace_trend(window) for window in _TREND_WINDOWS
|
||||
}
|
||||
metrics["job_failure_trends"] = {
|
||||
window: _job_failure_trend(window) for window in _TREND_WINDOWS
|
||||
}
|
||||
metrics["pods_phase_trends"] = _pods_phase_trends()
|
||||
metrics["pvc_usage_trends"] = _pvc_usage_trends()
|
||||
metrics["pod_waiting_now"] = _pod_waiting_now()
|
||||
metrics["pod_waiting_trends"] = _pod_waiting_trends()
|
||||
metrics["pod_terminated_now"] = _pod_terminated_now()
|
||||
metrics["pod_terminated_trends"] = _pod_terminated_trends()
|
||||
metrics["cluster_trends"] = _cluster_trends()
|
||||
metrics["node_condition_trends"] = _node_condition_trends()
|
||||
metrics["pod_reason_totals"] = {
|
||||
"waiting": _pod_reason_totals(
|
||||
_POD_WAITING_REASONS,
|
||||
"kube_pod_container_status_waiting_reason",
|
||||
),
|
||||
"terminated": _pod_reason_totals(
|
||||
_POD_TERMINATED_REASONS,
|
||||
"kube_pod_container_status_terminated_reason",
|
||||
),
|
||||
}
|
||||
except Exception as exc:
|
||||
errors.append(f"trends: {exc}")
|
||||
|
||||
|
||||
def _collect_issue_metrics(metrics: dict[str, Any], errors: list[str]) -> None:
|
||||
try:
|
||||
waiting_series = "kube_pod_container_status_waiting_reason"
|
||||
terminated_series = "kube_pod_container_status_terminated_reason"
|
||||
metrics["namespace_issue_top"] = {
|
||||
"crash_loop": _namespace_reason_entries(
|
||||
f'{waiting_series}{{reason="CrashLoopBackOff"}}',
|
||||
_NAMESPACE_ISSUE_LIMIT,
|
||||
),
|
||||
"image_pull": _namespace_reason_entries(
|
||||
f'{waiting_series}{{reason="ImagePullBackOff"}}',
|
||||
_NAMESPACE_ISSUE_LIMIT,
|
||||
),
|
||||
"err_image_pull": _namespace_reason_entries(
|
||||
f'{waiting_series}{{reason="ErrImagePull"}}',
|
||||
_NAMESPACE_ISSUE_LIMIT,
|
||||
),
|
||||
"config_error": _namespace_reason_entries(
|
||||
f'{waiting_series}{{reason="CreateContainerConfigError"}}',
|
||||
_NAMESPACE_ISSUE_LIMIT,
|
||||
),
|
||||
"oom_killed": _namespace_reason_entries(
|
||||
f'{terminated_series}{{reason="OOMKilled"}}',
|
||||
_NAMESPACE_ISSUE_LIMIT,
|
||||
),
|
||||
"terminated_error": _namespace_reason_entries(
|
||||
f'{terminated_series}{{reason="Error"}}',
|
||||
_NAMESPACE_ISSUE_LIMIT,
|
||||
),
|
||||
}
|
||||
except Exception as exc:
|
||||
errors.append(f"issues: {exc}")
|
||||
|
||||
|
||||
def _collect_alert_metrics(metrics: dict[str, Any], errors: list[str]) -> None:
|
||||
try:
|
||||
vm_now = _vm_alerts_now()
|
||||
vm_trends = {window: _vm_alerts_trend(window) for window in _TREND_WINDOWS}
|
||||
alertmanager_alerts = _alertmanager_alerts(errors)
|
||||
metrics["alerts"] = {
|
||||
"vm": {
|
||||
"active": vm_now,
|
||||
"active_total": len(vm_now),
|
||||
},
|
||||
"alertmanager": _summarize_alerts(alertmanager_alerts) if alertmanager_alerts else {},
|
||||
"trends": vm_trends,
|
||||
}
|
||||
except Exception as exc:
|
||||
errors.append(f"alerts: {exc}")
|
||||
|
||||
|
||||
def _collect_namespace_metrics(metrics: dict[str, Any], errors: list[str]) -> None:
|
||||
try:
|
||||
metrics["namespace_cpu_top"] = _filter_namespace_vector(
|
||||
_vm_vector(
|
||||
f'topk(5, sum by (namespace) (rate(container_cpu_usage_seconds_total{{namespace!=""}}[{_RATE_WINDOW}])))'
|
||||
)
|
||||
)
|
||||
metrics["namespace_mem_top"] = _filter_namespace_vector(
|
||||
_vm_vector(
|
||||
"topk(5, sum by (namespace) (container_memory_working_set_bytes{namespace!=\"\"}))"
|
||||
)
|
||||
)
|
||||
metrics["namespace_cpu_requests_top"] = _filter_namespace_vector(
|
||||
_vm_vector(
|
||||
"topk(5, sum by (namespace) (kube_pod_container_resource_requests_cpu_cores))"
|
||||
)
|
||||
)
|
||||
metrics["namespace_mem_requests_top"] = _filter_namespace_vector(
|
||||
_vm_vector(
|
||||
"topk(5, sum by (namespace) (kube_pod_container_resource_requests_memory_bytes))"
|
||||
)
|
||||
)
|
||||
metrics["namespace_net_top"] = _filter_namespace_vector(
|
||||
_vm_vector(
|
||||
f"topk(5, sum by (namespace) (rate(container_network_receive_bytes_total{{namespace!=\"\"}}[{_RATE_WINDOW}]) + rate(container_network_transmit_bytes_total{{namespace!=\"\"}}[{_RATE_WINDOW}])))"
|
||||
)
|
||||
)
|
||||
metrics["namespace_io_top"] = _filter_namespace_vector(
|
||||
_vm_vector(
|
||||
f"topk(5, sum by (namespace) (rate(container_fs_reads_bytes_total{{namespace!=\"\"}}[{_RATE_WINDOW}]) + rate(container_fs_writes_bytes_total{{namespace!=\"\"}}[{_RATE_WINDOW}])))"
|
||||
)
|
||||
)
|
||||
namespace_cpu_usage = _vm_namespace_totals(
|
||||
f'sum by (namespace) (rate(container_cpu_usage_seconds_total{{namespace!=""}}[{_RATE_WINDOW}]))'
|
||||
)
|
||||
namespace_cpu_requests = _vm_namespace_totals(
|
||||
"sum by (namespace) (kube_pod_container_resource_requests_cpu_cores)"
|
||||
)
|
||||
namespace_mem_usage = _vm_namespace_totals(
|
||||
'sum by (namespace) (container_memory_working_set_bytes{namespace!=""})'
|
||||
)
|
||||
namespace_mem_requests = _vm_namespace_totals(
|
||||
"sum by (namespace) (kube_pod_container_resource_requests_memory_bytes)"
|
||||
)
|
||||
metrics["namespace_capacity"] = _build_namespace_capacity(
|
||||
namespace_cpu_usage,
|
||||
namespace_cpu_requests,
|
||||
namespace_mem_usage,
|
||||
namespace_mem_requests,
|
||||
)
|
||||
metrics["namespace_totals"] = {
|
||||
"cpu": _namespace_totals_list(namespace_cpu_usage),
|
||||
"mem": _namespace_totals_list(namespace_mem_usage),
|
||||
"cpu_requests": _namespace_totals_list(namespace_cpu_requests),
|
||||
"mem_requests": _namespace_totals_list(namespace_mem_requests),
|
||||
}
|
||||
except Exception as exc:
|
||||
errors.append(f"namespace_usage: {exc}")
|
||||
try:
|
||||
namespace_exprs = _namespace_usage_exprs()
|
||||
namespace_baseline_map: dict[str, dict[str, dict[str, float]]] = {}
|
||||
for key, expr in namespace_exprs.items():
|
||||
baseline = _vm_baseline_map(expr, "namespace", _BASELINE_WINDOW)
|
||||
metrics.setdefault("namespace_baseline", {})[key] = _baseline_map_to_list(baseline, "namespace")
|
||||
for name, stats in baseline.items():
|
||||
namespace_baseline_map.setdefault(name, {})[key] = stats
|
||||
metrics["namespace_baseline_map"] = namespace_baseline_map
|
||||
except Exception as exc:
|
||||
errors.append(f"baseline: {exc}")
|
||||
metrics["namespace_capacity_summary"] = _namespace_capacity_summary(
|
||||
metrics.get("namespace_capacity", []),
|
||||
)
|
||||
|
||||
|
||||
def _finalize_metrics(metrics: dict[str, Any]) -> None:
|
||||
metrics["units"] = {
|
||||
"cpu": "percent",
|
||||
"ram": "percent",
|
||||
"net": "bytes_per_sec",
|
||||
"io": "bytes_per_sec",
|
||||
"disk": "percent",
|
||||
"restarts": "count",
|
||||
"pod_cpu": "cores",
|
||||
"pod_mem": "bytes",
|
||||
"pod_cpu_top_node": "cores",
|
||||
"pod_mem_top_node": "bytes",
|
||||
"job_failures_24h": "count",
|
||||
"namespace_cpu": "cores",
|
||||
"namespace_mem": "bytes",
|
||||
"namespace_cpu_requests": "cores",
|
||||
"namespace_mem_requests": "bytes",
|
||||
"namespace_net": "bytes_per_sec",
|
||||
"namespace_io": "bytes_per_sec",
|
||||
"pvc_used_percent": "percent",
|
||||
"capacity_cpu": "cores",
|
||||
"allocatable_cpu": "cores",
|
||||
"capacity_mem_bytes": "bytes",
|
||||
"allocatable_mem_bytes": "bytes",
|
||||
"capacity_pods": "count",
|
||||
"allocatable_pods": "count",
|
||||
}
|
||||
metrics["windows"] = {
|
||||
"rates": _RATE_WINDOW,
|
||||
"restarts": _RESTARTS_WINDOW,
|
||||
"trend": _TREND_WINDOWS,
|
||||
}
|
||||
|
||||
|
||||
def _summarize_metrics(errors: list[str]) -> dict[str, Any]:
|
||||
metrics: dict[str, Any] = {}
|
||||
_collect_vm_core(metrics, errors)
|
||||
_collect_node_metrics(metrics, errors)
|
||||
_collect_trend_metrics(metrics, errors)
|
||||
_collect_alert_metrics(metrics, errors)
|
||||
_collect_namespace_metrics(metrics, errors)
|
||||
_collect_issue_metrics(metrics, errors)
|
||||
metrics["pvc_usage_top"] = _pvc_usage(errors)
|
||||
metrics["trend_summary"] = _trend_summary(metrics)
|
||||
_finalize_metrics(metrics)
|
||||
return metrics
|
||||
|
||||
|
||||
def _trend_summary(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||
node_trends = metrics.get("node_trends", {}) if isinstance(metrics.get("node_trends"), dict) else {}
|
||||
namespace_trends = (
|
||||
metrics.get("namespace_trends", {}) if isinstance(metrics.get("namespace_trends"), dict) else {}
|
||||
)
|
||||
restarts = metrics.get("restart_trends", {}) if isinstance(metrics.get("restart_trends"), dict) else {}
|
||||
job_failures = (
|
||||
metrics.get("job_failure_trends", {}) if isinstance(metrics.get("job_failure_trends"), dict) else {}
|
||||
)
|
||||
summary: dict[str, Any] = {}
|
||||
for metric_key, target in (("cpu", "node_cpu"), ("ram", "node_ram")):
|
||||
metric_block = node_trends.get(metric_key, {}) if isinstance(node_trends.get(metric_key), dict) else {}
|
||||
summary[target] = {
|
||||
window: _limit_entries((metric_block.get(window) or {}).get("avg", []), 5)
|
||||
for window in _TREND_WINDOWS
|
||||
}
|
||||
for metric_key, target in (("cpu", "namespace_cpu"), ("mem", "namespace_mem")):
|
||||
metric_block = namespace_trends.get(metric_key, {}) if isinstance(namespace_trends.get(metric_key), dict) else {}
|
||||
summary[target] = {
|
||||
window: _limit_entries((metric_block.get(window) or {}).get("avg", []), 5)
|
||||
for window in _TREND_WINDOWS
|
||||
}
|
||||
summary["restarts"] = {window: _limit_entries(entries or [], 5) for window, entries in restarts.items()}
|
||||
summary["job_failures"] = {
|
||||
window: _limit_entries(entries or [], 5) for window, entries in job_failures.items()
|
||||
}
|
||||
return summary
|
||||
|
||||
|
||||
def _build_offenders(metrics: dict[str, Any]) -> dict[str, Any]:
|
||||
offenders: dict[str, Any] = {}
|
||||
offenders["pod_restarts_1h"] = _pod_restarts_top(metrics)
|
||||
offenders["pod_waiting_now"] = metrics.get("pod_waiting_now") or {}
|
||||
offenders["pod_terminated_now"] = metrics.get("pod_terminated_now") or {}
|
||||
offenders["job_failures_24h"] = metrics.get("job_failures_24h") or []
|
||||
offenders["pvc_pressure"] = _pvc_pressure_entries(metrics)
|
||||
offenders["namespace_issues"] = metrics.get("namespace_issue_top") or {}
|
||||
return offenders
|
||||
|
||||
|
||||
def _namespace_totals_list(totals: dict[str, float]) -> list[dict[str, Any]]:
|
||||
entries = [
|
||||
{"namespace": name, "value": value}
|
||||
for name, value in totals.items()
|
||||
if isinstance(name, str) and name
|
||||
]
|
||||
entries.sort(key=lambda item: (-(item.get("value") or 0), item.get("namespace") or ""))
|
||||
return entries
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
401
ariadne/services/cluster_state_nodes.py
Normal file
401
ariadne/services/cluster_state_nodes.py
Normal file
@ -0,0 +1,401 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from .cluster_state_contract import *
|
||||
|
||||
def _node_usage_by_hardware(node_load: list[dict[str, Any]], node_details: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
if not node_load or not node_details:
|
||||
return []
|
||||
hardware_by_node = _hardware_map(node_details)
|
||||
buckets: dict[str, dict[str, list[float]]] = {}
|
||||
for entry in node_load:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
node = entry.get("node")
|
||||
if not isinstance(node, str) or not node:
|
||||
continue
|
||||
hardware = hardware_by_node.get(node, "unknown")
|
||||
_append_hardware_usage(buckets, str(hardware), entry)
|
||||
return _finalize_hardware_usage(buckets)
|
||||
|
||||
|
||||
def _hardware_map(node_details: list[dict[str, Any]]) -> dict[str, str]:
|
||||
mapping: dict[str, str] = {}
|
||||
for node in node_details:
|
||||
if not isinstance(node, dict):
|
||||
continue
|
||||
name = node.get("name")
|
||||
if isinstance(name, str) and name:
|
||||
mapping[name] = str(node.get("hardware") or "unknown")
|
||||
return mapping
|
||||
|
||||
|
||||
def _append_hardware_usage(buckets: dict[str, dict[str, list[float]]], hardware: str, entry: dict[str, Any]) -> None:
|
||||
bucket = buckets.setdefault(hardware, {"load_index": [], "cpu": [], "ram": [], "net": [], "io": []})
|
||||
for key in ("load_index", "cpu", "ram", "net", "io"):
|
||||
value = entry.get(key)
|
||||
if isinstance(value, (int, float)):
|
||||
bucket[key].append(float(value))
|
||||
|
||||
|
||||
def _finalize_hardware_usage(buckets: dict[str, dict[str, list[float]]]) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for hardware, metrics in buckets.items():
|
||||
row: dict[str, Any] = {"hardware": hardware}
|
||||
for key, values in metrics.items():
|
||||
if values:
|
||||
row[key] = sum(values) / len(values)
|
||||
output.append(row)
|
||||
output.sort(key=lambda item: (-(item.get("load_index") or 0), item.get("hardware") or ""))
|
||||
return output
|
||||
|
||||
def _node_ready(conditions: Any) -> bool:
|
||||
if not isinstance(conditions, list):
|
||||
return False
|
||||
for condition in conditions:
|
||||
if not isinstance(condition, dict):
|
||||
continue
|
||||
if condition.get("type") == "Ready":
|
||||
return condition.get("status") == "True"
|
||||
return False
|
||||
|
||||
|
||||
def _summarize_nodes(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
names: list[str] = []
|
||||
not_ready: list[str] = []
|
||||
for node in _items(payload):
|
||||
metadata = node.get("metadata") if isinstance(node.get("metadata"), dict) else {}
|
||||
status = node.get("status") if isinstance(node.get("status"), dict) else {}
|
||||
name = metadata.get("name") if isinstance(metadata.get("name"), str) else ""
|
||||
if not name:
|
||||
continue
|
||||
names.append(name)
|
||||
if not _node_ready(status.get("conditions")):
|
||||
not_ready.append(name)
|
||||
names.sort()
|
||||
not_ready.sort()
|
||||
total = len(names)
|
||||
ready = total - len(not_ready)
|
||||
return {
|
||||
"total": total,
|
||||
"ready": ready,
|
||||
"not_ready": len(not_ready),
|
||||
"names": names,
|
||||
"not_ready_names": not_ready,
|
||||
}
|
||||
|
||||
|
||||
def _node_labels(labels: dict[str, Any]) -> dict[str, Any]:
|
||||
if not isinstance(labels, dict):
|
||||
return {}
|
||||
keep: dict[str, Any] = {}
|
||||
for key, value in labels.items():
|
||||
if key.startswith("node-role.kubernetes.io/"):
|
||||
keep[key] = value
|
||||
if key in {
|
||||
"kubernetes.io/arch",
|
||||
"kubernetes.io/hostname",
|
||||
"beta.kubernetes.io/arch",
|
||||
"hardware",
|
||||
"jetson",
|
||||
}:
|
||||
keep[key] = value
|
||||
return keep
|
||||
|
||||
|
||||
def _node_addresses(status: dict[str, Any]) -> dict[str, str]:
|
||||
addresses = status.get("addresses") if isinstance(status.get("addresses"), list) else []
|
||||
output: dict[str, str] = {}
|
||||
for addr in addresses:
|
||||
if not isinstance(addr, dict):
|
||||
continue
|
||||
addr_type = addr.get("type")
|
||||
addr_value = addr.get("address")
|
||||
if isinstance(addr_type, str) and isinstance(addr_value, str):
|
||||
output[addr_type] = addr_value
|
||||
return output
|
||||
|
||||
|
||||
def _node_details(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
details: list[dict[str, Any]] = []
|
||||
for node in _items(payload):
|
||||
metadata = node.get("metadata") if isinstance(node.get("metadata"), dict) else {}
|
||||
spec = node.get("spec") if isinstance(node.get("spec"), dict) else {}
|
||||
status = node.get("status") if isinstance(node.get("status"), dict) else {}
|
||||
node_info = status.get("nodeInfo") if isinstance(status.get("nodeInfo"), dict) else {}
|
||||
labels = metadata.get("labels") if isinstance(metadata.get("labels"), dict) else {}
|
||||
name = metadata.get("name") if isinstance(metadata.get("name"), str) else ""
|
||||
if not name:
|
||||
continue
|
||||
roles = _node_roles(labels)
|
||||
conditions = _node_pressure_conditions(status.get("conditions"))
|
||||
created_at = metadata.get("creationTimestamp") if isinstance(metadata.get("creationTimestamp"), str) else ""
|
||||
taints = _node_taints(spec.get("taints"))
|
||||
details.append(
|
||||
{
|
||||
"name": name,
|
||||
"ready": _node_ready(status.get("conditions")),
|
||||
"roles": roles,
|
||||
"is_worker": _node_is_worker(labels),
|
||||
"labels": _node_labels(labels),
|
||||
"hardware": _hardware_hint(labels, node_info),
|
||||
"arch": node_info.get("architecture") or "",
|
||||
"os": node_info.get("operatingSystem") or "",
|
||||
"kernel": node_info.get("kernelVersion") or "",
|
||||
"kubelet": node_info.get("kubeletVersion") or "",
|
||||
"container_runtime": node_info.get("containerRuntimeVersion") or "",
|
||||
"addresses": _node_addresses(status),
|
||||
"created_at": created_at,
|
||||
"age_hours": _age_hours(created_at),
|
||||
"taints": taints,
|
||||
"unschedulable": bool(spec.get("unschedulable")),
|
||||
"capacity": _node_capacity(status.get("capacity")),
|
||||
"allocatable": _node_capacity(status.get("allocatable")),
|
||||
"pressure": conditions,
|
||||
}
|
||||
)
|
||||
details.sort(key=lambda item: item.get("name") or "")
|
||||
return details
|
||||
|
||||
|
||||
def _age_hours(timestamp: str) -> float | None:
|
||||
if not timestamp:
|
||||
return None
|
||||
try:
|
||||
parsed = datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
|
||||
except ValueError:
|
||||
return None
|
||||
return round((datetime.now(timezone.utc) - parsed).total_seconds() / 3600, 1)
|
||||
|
||||
|
||||
def _node_age_stats(details: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
ages: list[tuple[str, float]] = []
|
||||
for node in details:
|
||||
name = node.get("name") if isinstance(node, dict) else ""
|
||||
age = node.get("age_hours")
|
||||
if isinstance(name, str) and name and isinstance(age, (int, float)):
|
||||
ages.append((name, float(age)))
|
||||
if not ages:
|
||||
return {}
|
||||
ages.sort(key=lambda item: item[1])
|
||||
values = [age for _, age in ages]
|
||||
return {
|
||||
"min": round(min(values), 1),
|
||||
"max": round(max(values), 1),
|
||||
"avg": round(sum(values) / len(values), 1),
|
||||
"youngest": [{"name": name, "age_hours": age} for name, age in ages[:5]],
|
||||
"oldest": [{"name": name, "age_hours": age} for name, age in ages[-5:]],
|
||||
}
|
||||
|
||||
|
||||
def _node_flagged(details: list[dict[str, Any]], key: str) -> list[str]:
|
||||
names: list[str] = []
|
||||
for node in details:
|
||||
name = node.get("name") if isinstance(node, dict) else ""
|
||||
if not isinstance(name, str) or not name:
|
||||
continue
|
||||
if node.get(key):
|
||||
names.append(name)
|
||||
names.sort()
|
||||
return names
|
||||
|
||||
|
||||
def _node_taints(raw: Any) -> list[dict[str, str]]:
|
||||
if not isinstance(raw, list):
|
||||
return []
|
||||
taints: list[dict[str, str]] = []
|
||||
for entry in raw:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
key = entry.get("key")
|
||||
effect = entry.get("effect")
|
||||
value = entry.get("value")
|
||||
if isinstance(key, str) and isinstance(effect, str):
|
||||
taints.append(
|
||||
{
|
||||
"key": key,
|
||||
"value": value if isinstance(value, str) else "",
|
||||
"effect": effect,
|
||||
}
|
||||
)
|
||||
return taints
|
||||
|
||||
|
||||
def _summarize_inventory(details: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
summary = {
|
||||
"total": 0,
|
||||
"ready": 0,
|
||||
"workers": {"total": 0, "ready": 0},
|
||||
"by_hardware": {},
|
||||
"by_arch": {},
|
||||
"by_role": {},
|
||||
"not_ready_names": [],
|
||||
"pressure_nodes": {key: [] for key in _PRESSURE_TYPES},
|
||||
"age_stats": {},
|
||||
"tainted_nodes": [],
|
||||
"unschedulable_nodes": [],
|
||||
}
|
||||
not_ready: list[str] = []
|
||||
for node in details:
|
||||
name = _apply_node_summary(summary, node)
|
||||
if name and not node.get("ready"):
|
||||
not_ready.append(name)
|
||||
not_ready.sort()
|
||||
summary["not_ready_names"] = not_ready
|
||||
for cond_type in summary["pressure_nodes"]:
|
||||
summary["pressure_nodes"][cond_type].sort()
|
||||
summary["age_stats"] = _node_age_stats(details)
|
||||
summary["tainted_nodes"] = _node_flagged(details, "taints")
|
||||
summary["unschedulable_nodes"] = _node_flagged(details, "unschedulable")
|
||||
return summary
|
||||
|
||||
|
||||
def _hardware_groups(details: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
groups: dict[str, list[str]] = {}
|
||||
for node in details:
|
||||
if not isinstance(node, dict):
|
||||
continue
|
||||
name = node.get("name")
|
||||
if not isinstance(name, str) or not name:
|
||||
continue
|
||||
hardware = str(node.get("hardware") or "unknown")
|
||||
groups.setdefault(hardware, []).append(name)
|
||||
output: list[dict[str, Any]] = []
|
||||
for hardware, nodes in groups.items():
|
||||
nodes.sort()
|
||||
output.append({"hardware": hardware, "count": len(nodes), "nodes": nodes})
|
||||
output.sort(key=lambda item: (-(item.get("count") or 0), item.get("hardware") or ""))
|
||||
return output
|
||||
|
||||
|
||||
def _pressure_summary(nodes_summary: dict[str, Any]) -> dict[str, Any]:
|
||||
pressure_nodes = nodes_summary.get("pressure_nodes") if isinstance(nodes_summary, dict) else {}
|
||||
summary: dict[str, Any] = {"by_type": {}, "total": 0}
|
||||
if isinstance(pressure_nodes, dict):
|
||||
for cond, names in pressure_nodes.items():
|
||||
count = len(names) if isinstance(names, list) else 0
|
||||
summary["by_type"][cond] = count
|
||||
summary["total"] += count
|
||||
unschedulable = nodes_summary.get("unschedulable_nodes") or []
|
||||
summary["unschedulable"] = len(unschedulable) if isinstance(unschedulable, list) else 0
|
||||
return summary
|
||||
|
||||
|
||||
def _apply_node_summary(summary: dict[str, Any], node: dict[str, Any]) -> str:
|
||||
name = node.get("name") if isinstance(node, dict) else ""
|
||||
if not isinstance(name, str) or not name:
|
||||
return ""
|
||||
summary["total"] += 1
|
||||
ready = bool(node.get("ready"))
|
||||
if ready:
|
||||
summary["ready"] += 1
|
||||
if node.get("is_worker"):
|
||||
summary["workers"]["total"] += 1
|
||||
if ready:
|
||||
summary["workers"]["ready"] += 1
|
||||
hardware = node.get("hardware") or "unknown"
|
||||
arch = node.get("arch") or "unknown"
|
||||
summary["by_hardware"][hardware] = summary["by_hardware"].get(hardware, 0) + 1
|
||||
summary["by_arch"][arch] = summary["by_arch"].get(arch, 0) + 1
|
||||
for role in node.get("roles") or []:
|
||||
summary["by_role"][role] = summary["by_role"].get(role, 0) + 1
|
||||
_apply_pressure(summary, node, name)
|
||||
return name
|
||||
|
||||
|
||||
def _apply_pressure(summary: dict[str, Any], node: dict[str, Any], name: str) -> None:
|
||||
pressure = node.get("pressure") or {}
|
||||
if not isinstance(pressure, dict):
|
||||
return
|
||||
for cond_type, active in pressure.items():
|
||||
if active and cond_type in summary["pressure_nodes"]:
|
||||
summary["pressure_nodes"][cond_type].append(name)
|
||||
|
||||
|
||||
def _node_capacity(raw: Any) -> dict[str, str]:
|
||||
if not isinstance(raw, dict):
|
||||
return {}
|
||||
output: dict[str, str] = {}
|
||||
for key in _CAPACITY_KEYS:
|
||||
value = raw.get(key)
|
||||
if isinstance(value, (str, int, float)) and value != "":
|
||||
output[key] = str(value)
|
||||
return output
|
||||
|
||||
|
||||
def _node_pressure_conditions(conditions: Any) -> dict[str, bool]:
|
||||
if not isinstance(conditions, list):
|
||||
return {}
|
||||
pressure: dict[str, bool] = {}
|
||||
for condition in conditions:
|
||||
if not isinstance(condition, dict):
|
||||
continue
|
||||
cond_type = condition.get("type")
|
||||
if cond_type in _PRESSURE_TYPES:
|
||||
pressure[cond_type] = condition.get("status") == "True"
|
||||
return pressure
|
||||
|
||||
|
||||
def _node_roles(labels: dict[str, Any]) -> list[str]:
|
||||
roles: list[str] = []
|
||||
for key in labels.keys():
|
||||
if key.startswith("node-role.kubernetes.io/"):
|
||||
role = key.split("/", 1)[-1]
|
||||
if role:
|
||||
roles.append(role)
|
||||
return sorted(set(roles))
|
||||
|
||||
|
||||
def _node_is_worker(labels: dict[str, Any]) -> bool:
|
||||
if "node-role.kubernetes.io/control-plane" in labels:
|
||||
return False
|
||||
if "node-role.kubernetes.io/master" in labels:
|
||||
return False
|
||||
if "node-role.kubernetes.io/worker" in labels:
|
||||
return True
|
||||
return True
|
||||
|
||||
|
||||
def _hardware_hint(labels: dict[str, Any], node_info: dict[str, Any]) -> str:
|
||||
result = "unknown"
|
||||
if str(labels.get("jetson") or "").lower() == "true":
|
||||
result = "jetson"
|
||||
else:
|
||||
hardware = (labels.get("hardware") or "").strip().lower()
|
||||
if hardware:
|
||||
result = hardware
|
||||
else:
|
||||
kernel = str(node_info.get("kernelVersion") or "").lower()
|
||||
os_image = str(node_info.get("osImage") or "").lower()
|
||||
if "tegra" in kernel or "jetson" in os_image:
|
||||
result = "jetson"
|
||||
elif "raspi" in kernel or "bcm2711" in kernel:
|
||||
result = "rpi"
|
||||
else:
|
||||
arch = str(node_info.get("architecture") or "").lower()
|
||||
if arch == "amd64":
|
||||
result = "amd64"
|
||||
elif arch == "arm64":
|
||||
result = "arm64-unknown"
|
||||
return result
|
||||
|
||||
|
||||
def _condition_status(conditions: Any, cond_type: str) -> tuple[bool | None, str, str]:
|
||||
if not isinstance(conditions, list):
|
||||
return None, "", ""
|
||||
for condition in conditions:
|
||||
if not isinstance(condition, dict):
|
||||
continue
|
||||
if condition.get("type") != cond_type:
|
||||
continue
|
||||
status = condition.get("status")
|
||||
if status == "True":
|
||||
return True, condition.get("reason") or "", condition.get("message") or ""
|
||||
if status == "False":
|
||||
return False, condition.get("reason") or "", condition.get("message") or ""
|
||||
return None, condition.get("reason") or "", condition.get("message") or ""
|
||||
return None, "", ""
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
340
ariadne/services/cluster_state_pods.py
Normal file
340
ariadne/services/cluster_state_pods.py
Normal file
@ -0,0 +1,340 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .cluster_state_contract import *
|
||||
from .cluster_state_flux_events import *
|
||||
from .cluster_state_nodes import *
|
||||
|
||||
def _workload_from_labels(labels: dict[str, Any]) -> tuple[str, str]:
|
||||
for key in _WORKLOAD_LABEL_KEYS:
|
||||
value = labels.get(key)
|
||||
if isinstance(value, str) and value:
|
||||
return value, f"label:{key}"
|
||||
return "", ""
|
||||
|
||||
|
||||
def _owner_reference(metadata: dict[str, Any]) -> tuple[str, str]:
|
||||
owners = metadata.get("ownerReferences") if isinstance(metadata.get("ownerReferences"), list) else []
|
||||
for owner in owners:
|
||||
if not isinstance(owner, dict):
|
||||
continue
|
||||
name = owner.get("name")
|
||||
kind = owner.get("kind")
|
||||
if isinstance(name, str) and name:
|
||||
return name, f"owner:{kind or 'unknown'}"
|
||||
return "", ""
|
||||
|
||||
|
||||
def _pod_workload(meta: dict[str, Any]) -> tuple[str, str]:
|
||||
labels = meta.get("labels") if isinstance(meta.get("labels"), dict) else {}
|
||||
name, source = _workload_from_labels(labels)
|
||||
if name:
|
||||
return name, source
|
||||
return _owner_reference(meta)
|
||||
|
||||
|
||||
def _summarize_workloads(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
workloads: dict[tuple[str, str], dict[str, Any]] = {}
|
||||
for pod in _items(payload):
|
||||
metadata = pod.get("metadata") if isinstance(pod.get("metadata"), dict) else {}
|
||||
spec = pod.get("spec") if isinstance(pod.get("spec"), dict) else {}
|
||||
status = pod.get("status") if isinstance(pod.get("status"), dict) else {}
|
||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
||||
if not _namespace_allowed(namespace):
|
||||
continue
|
||||
workload, source = _pod_workload(metadata)
|
||||
if not workload:
|
||||
continue
|
||||
node = spec.get("nodeName") if isinstance(spec.get("nodeName"), str) else ""
|
||||
phase = status.get("phase") if isinstance(status.get("phase"), str) else ""
|
||||
key = (namespace, workload)
|
||||
entry = workloads.setdefault(
|
||||
key,
|
||||
{
|
||||
"namespace": namespace,
|
||||
"workload": workload,
|
||||
"source": source,
|
||||
"nodes": {},
|
||||
"pods_total": 0,
|
||||
"pods_running": 0,
|
||||
},
|
||||
)
|
||||
entry["pods_total"] += 1
|
||||
if phase == "Running":
|
||||
entry["pods_running"] += 1
|
||||
if node:
|
||||
nodes = entry["nodes"]
|
||||
nodes[node] = nodes.get(node, 0) + 1
|
||||
output: list[dict[str, Any]] = []
|
||||
for entry in workloads.values():
|
||||
nodes = entry.get("nodes") or {}
|
||||
primary = ""
|
||||
if isinstance(nodes, dict) and nodes:
|
||||
primary = sorted(nodes.items(), key=lambda item: (-item[1], item[0]))[0][0]
|
||||
entry["primary_node"] = primary
|
||||
output.append(entry)
|
||||
output.sort(key=lambda item: (item.get("namespace") or "", item.get("workload") or ""))
|
||||
return output
|
||||
|
||||
|
||||
def _summarize_namespace_pods(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
namespaces: dict[str, dict[str, Any]] = {}
|
||||
for pod in _items(payload):
|
||||
metadata = pod.get("metadata") if isinstance(pod.get("metadata"), dict) else {}
|
||||
status = pod.get("status") if isinstance(pod.get("status"), dict) else {}
|
||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
||||
if not _namespace_allowed(namespace):
|
||||
continue
|
||||
phase = status.get("phase") if isinstance(status.get("phase"), str) else ""
|
||||
entry = namespaces.setdefault(
|
||||
namespace,
|
||||
{
|
||||
"namespace": namespace,
|
||||
"pods_total": 0,
|
||||
"pods_running": 0,
|
||||
"pods_pending": 0,
|
||||
"pods_failed": 0,
|
||||
"pods_succeeded": 0,
|
||||
},
|
||||
)
|
||||
entry["pods_total"] += 1
|
||||
if phase == "Running":
|
||||
entry["pods_running"] += 1
|
||||
elif phase == "Pending":
|
||||
entry["pods_pending"] += 1
|
||||
elif phase == "Failed":
|
||||
entry["pods_failed"] += 1
|
||||
elif phase == "Succeeded":
|
||||
entry["pods_succeeded"] += 1
|
||||
output = list(namespaces.values())
|
||||
output.sort(key=lambda item: (-item.get("pods_total", 0), item.get("namespace") or ""))
|
||||
return output
|
||||
|
||||
|
||||
def _summarize_namespace_nodes(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
namespaces: dict[str, dict[str, Any]] = {}
|
||||
for pod in _items(payload):
|
||||
metadata = pod.get("metadata") if isinstance(pod.get("metadata"), dict) else {}
|
||||
spec = pod.get("spec") if isinstance(pod.get("spec"), dict) else {}
|
||||
status = pod.get("status") if isinstance(pod.get("status"), dict) else {}
|
||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
||||
if not _namespace_allowed(namespace):
|
||||
continue
|
||||
node = spec.get("nodeName") if isinstance(spec.get("nodeName"), str) else ""
|
||||
if not node:
|
||||
continue
|
||||
phase = status.get("phase") if isinstance(status.get("phase"), str) else ""
|
||||
entry = namespaces.setdefault(
|
||||
namespace,
|
||||
{
|
||||
"namespace": namespace,
|
||||
"pods_total": 0,
|
||||
"pods_running": 0,
|
||||
"nodes": {},
|
||||
},
|
||||
)
|
||||
entry["pods_total"] += 1
|
||||
if phase == "Running":
|
||||
entry["pods_running"] += 1
|
||||
nodes = entry["nodes"]
|
||||
nodes[node] = nodes.get(node, 0) + 1
|
||||
output: list[dict[str, Any]] = []
|
||||
for entry in namespaces.values():
|
||||
nodes = entry.get("nodes") or {}
|
||||
primary = ""
|
||||
if isinstance(nodes, dict) and nodes:
|
||||
primary = sorted(nodes.items(), key=lambda item: (-item[1], item[0]))[0][0]
|
||||
entry["primary_node"] = primary
|
||||
output.append(entry)
|
||||
output.sort(key=lambda item: (-item.get("pods_total", 0), item.get("namespace") or ""))
|
||||
return output
|
||||
|
||||
|
||||
_NODE_PHASE_KEYS = {
|
||||
"Running": "pods_running",
|
||||
"Pending": "pods_pending",
|
||||
"Failed": "pods_failed",
|
||||
"Succeeded": "pods_succeeded",
|
||||
}
|
||||
|
||||
|
||||
def _summarize_node_pods(payload: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
nodes: dict[str, dict[str, Any]] = {}
|
||||
for pod in _items(payload):
|
||||
context = _node_pod_context(pod)
|
||||
if not context:
|
||||
continue
|
||||
node, namespace, phase = context
|
||||
entry = _node_pod_entry(nodes, node)
|
||||
_node_pod_apply(entry, namespace, phase)
|
||||
return _node_pod_finalize(nodes)
|
||||
|
||||
|
||||
def _node_pod_context(pod: dict[str, Any]) -> tuple[str, str, str] | None:
|
||||
metadata = pod.get("metadata") if isinstance(pod.get("metadata"), dict) else {}
|
||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
||||
if not _namespace_allowed(namespace):
|
||||
return None
|
||||
spec = pod.get("spec") if isinstance(pod.get("spec"), dict) else {}
|
||||
node = spec.get("nodeName") if isinstance(spec.get("nodeName"), str) else ""
|
||||
if not node:
|
||||
return None
|
||||
status = pod.get("status") if isinstance(pod.get("status"), dict) else {}
|
||||
phase = status.get("phase") if isinstance(status.get("phase"), str) else ""
|
||||
return node, namespace, phase
|
||||
|
||||
|
||||
def _node_pod_entry(nodes: dict[str, dict[str, Any]], node: str) -> dict[str, Any]:
|
||||
return nodes.setdefault(
|
||||
node,
|
||||
{
|
||||
"node": node,
|
||||
"pods_total": 0,
|
||||
"pods_running": 0,
|
||||
"pods_pending": 0,
|
||||
"pods_failed": 0,
|
||||
"pods_succeeded": 0,
|
||||
"namespaces": {},
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _node_pod_apply(entry: dict[str, Any], namespace: str, phase: str) -> None:
|
||||
entry["pods_total"] += 1
|
||||
phase_key = _NODE_PHASE_KEYS.get(phase)
|
||||
if phase_key:
|
||||
entry[phase_key] += 1
|
||||
if namespace:
|
||||
namespaces = entry["namespaces"]
|
||||
namespaces[namespace] = namespaces.get(namespace, 0) + 1
|
||||
|
||||
|
||||
def _node_pod_finalize(nodes: dict[str, dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for entry in nodes.values():
|
||||
namespaces = entry.get("namespaces") or {}
|
||||
if isinstance(namespaces, dict):
|
||||
entry["namespaces_top"] = sorted(
|
||||
namespaces.items(), key=lambda item: (-item[1], item[0])
|
||||
)[:3]
|
||||
output.append(entry)
|
||||
output.sort(key=lambda item: (-item.get("pods_total", 0), item.get("node") or ""))
|
||||
return output
|
||||
|
||||
|
||||
def _node_pods_top(node_pods: list[dict[str, Any]], limit: int = 5) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for entry in node_pods[:limit]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
output.append(
|
||||
{
|
||||
"node": entry.get("node"),
|
||||
"pods_total": entry.get("pods_total"),
|
||||
"pods_running": entry.get("pods_running"),
|
||||
"namespaces_top": entry.get("namespaces_top") or [],
|
||||
}
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def _record_pending_pod(pending_oldest: list[dict[str, Any]], info: dict[str, Any]) -> bool:
|
||||
age_hours = info.get("age_hours")
|
||||
if age_hours is None:
|
||||
return False
|
||||
pending_oldest.append(info)
|
||||
return age_hours >= _PENDING_15M_HOURS
|
||||
|
||||
|
||||
def _update_pod_issue(pod: dict[str, Any], acc: dict[str, Any]) -> None:
|
||||
metadata = pod.get("metadata") if isinstance(pod.get("metadata"), dict) else {}
|
||||
status = pod.get("status") if isinstance(pod.get("status"), dict) else {}
|
||||
spec = pod.get("spec") if isinstance(pod.get("spec"), dict) else {}
|
||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
||||
name = metadata.get("name") if isinstance(metadata.get("name"), str) else ""
|
||||
created_at = (
|
||||
metadata.get("creationTimestamp")
|
||||
if isinstance(metadata.get("creationTimestamp"), str)
|
||||
else ""
|
||||
)
|
||||
age_hours = _age_hours(created_at)
|
||||
if not name or not namespace:
|
||||
return
|
||||
phase = status.get("phase") if isinstance(status.get("phase"), str) else ""
|
||||
restarts = 0
|
||||
waiting_reasons: list[str] = []
|
||||
for container in status.get("containerStatuses") or []:
|
||||
if not isinstance(container, dict):
|
||||
continue
|
||||
restarts += int(container.get("restartCount") or 0)
|
||||
state = container.get("state") if isinstance(container.get("state"), dict) else {}
|
||||
waiting = state.get("waiting") if isinstance(state.get("waiting"), dict) else {}
|
||||
reason = waiting.get("reason")
|
||||
if isinstance(reason, str) and reason:
|
||||
waiting_reasons.append(reason)
|
||||
acc["waiting_reasons"][reason] = acc["waiting_reasons"].get(reason, 0) + 1
|
||||
phase_reason = status.get("reason")
|
||||
if isinstance(phase_reason, str) and phase_reason:
|
||||
acc["phase_reasons"][phase_reason] = acc["phase_reasons"].get(phase_reason, 0) + 1
|
||||
if phase in acc["counts"]:
|
||||
acc["counts"][phase] += 1
|
||||
if phase in _PHASE_SEVERITY or restarts > 0:
|
||||
acc["items"].append(
|
||||
{
|
||||
"namespace": namespace,
|
||||
"pod": name,
|
||||
"node": spec.get("nodeName") or "",
|
||||
"phase": phase,
|
||||
"reason": status.get("reason") or "",
|
||||
"restarts": restarts,
|
||||
"waiting_reasons": sorted(set(waiting_reasons)),
|
||||
"created_at": created_at,
|
||||
"age_hours": age_hours,
|
||||
}
|
||||
)
|
||||
if phase == "Pending":
|
||||
info = {
|
||||
"namespace": namespace,
|
||||
"pod": name,
|
||||
"node": spec.get("nodeName") or "",
|
||||
"age_hours": age_hours,
|
||||
"reason": status.get("reason") or "",
|
||||
}
|
||||
if _record_pending_pod(acc["pending_oldest"], info):
|
||||
acc["pending_over_15m"] += 1
|
||||
|
||||
|
||||
def _summarize_pod_issues(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
acc = {
|
||||
"items": [],
|
||||
"counts": {key: 0 for key in _PHASE_SEVERITY},
|
||||
"pending_oldest": [],
|
||||
"pending_over_15m": 0,
|
||||
"waiting_reasons": {},
|
||||
"phase_reasons": {},
|
||||
}
|
||||
for pod in _items(payload):
|
||||
if isinstance(pod, dict):
|
||||
_update_pod_issue(pod, acc)
|
||||
items = acc["items"]
|
||||
items.sort(
|
||||
key=lambda item: (
|
||||
-_PHASE_SEVERITY.get(item.get("phase") or "", 0),
|
||||
-(item.get("restarts") or 0),
|
||||
item.get("namespace") or "",
|
||||
item.get("pod") or "",
|
||||
)
|
||||
)
|
||||
pending_oldest = acc["pending_oldest"]
|
||||
pending_oldest.sort(key=lambda item: -(item.get("age_hours") or 0.0))
|
||||
return {
|
||||
"counts": acc["counts"],
|
||||
"items": items[:20],
|
||||
"pending_oldest": pending_oldest[:10],
|
||||
"pending_over_15m": acc["pending_over_15m"],
|
||||
"waiting_reasons": acc["waiting_reasons"],
|
||||
"phase_reasons": acc["phase_reasons"],
|
||||
}
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
104
ariadne/services/cluster_state_profiles.py
Normal file
104
ariadne/services/cluster_state_profiles.py
Normal file
@ -0,0 +1,104 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .cluster_state_contract import *
|
||||
|
||||
ProfileRows = list[dict[str, Any]]
|
||||
NodeWorkloadMap = dict[str, dict[str, int]]
|
||||
|
||||
|
||||
def _node_profiles(node_context: ProfileRows, node_pods: ProfileRows, node_workloads: NodeWorkloadMap) -> ProfileRows:
|
||||
pod_map = {entry.get("node"): entry for entry in node_pods if isinstance(entry, dict)}
|
||||
workload_map = node_workloads or {}
|
||||
profiles: list[dict[str, Any]] = []
|
||||
for entry in node_context:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
node = entry.get("node")
|
||||
if not isinstance(node, str) or not node:
|
||||
continue
|
||||
pods = pod_map.get(node, {})
|
||||
workloads = workload_map.get(node, {})
|
||||
workloads_top = sorted(workloads.items(), key=lambda item: (-item[1], item[0]))[:_NODE_WORKLOAD_TOP]
|
||||
profiles.append(
|
||||
{
|
||||
"node": node,
|
||||
"ready": entry.get("ready"),
|
||||
"hardware": entry.get("hardware"),
|
||||
"arch": entry.get("arch"),
|
||||
"roles": entry.get("roles"),
|
||||
"pods_total": pods.get("pods_total"),
|
||||
"pods_running": pods.get("pods_running"),
|
||||
"namespaces_top": pods.get("namespaces_top") or [],
|
||||
"workloads_top": workloads_top,
|
||||
"load_index": entry.get("load_index"),
|
||||
"cpu": entry.get("cpu"),
|
||||
"ram": entry.get("ram"),
|
||||
"net": entry.get("net"),
|
||||
"io": entry.get("io"),
|
||||
"disk": entry.get("disk"),
|
||||
"baseline_delta": entry.get("baseline_delta") or {},
|
||||
}
|
||||
)
|
||||
profiles.sort(key=lambda item: (-(item.get("load_index") or 0), item.get("node") or ""))
|
||||
return profiles[:_PROFILE_LIMIT]
|
||||
|
||||
|
||||
def _namespace_profiles(namespace_context: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
entries = [entry for entry in namespace_context if isinstance(entry, dict)]
|
||||
entries.sort(key=lambda item: (-(item.get("pods_total") or 0), item.get("namespace") or ""))
|
||||
output: list[dict[str, Any]] = []
|
||||
for entry in entries[:_PROFILE_LIMIT]:
|
||||
output.append(
|
||||
{
|
||||
"namespace": entry.get("namespace"),
|
||||
"pods_total": entry.get("pods_total"),
|
||||
"pods_running": entry.get("pods_running"),
|
||||
"primary_node": entry.get("primary_node"),
|
||||
"nodes_top": entry.get("nodes_top") or [],
|
||||
"cpu_usage": entry.get("cpu_usage"),
|
||||
"mem_usage": entry.get("mem_usage"),
|
||||
"cpu_ratio": entry.get("cpu_ratio"),
|
||||
"mem_ratio": entry.get("mem_ratio"),
|
||||
"baseline_delta": entry.get("baseline_delta") or {},
|
||||
}
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def _workload_profiles(workloads: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
entries = [entry for entry in workloads if isinstance(entry, dict)]
|
||||
entries.sort(
|
||||
key=lambda item: (-(item.get("pods_total") or 0), item.get("namespace") or "", item.get("workload") or ""),
|
||||
)
|
||||
output: list[dict[str, Any]] = []
|
||||
for entry in entries[:_PROFILE_LIMIT]:
|
||||
nodes = entry.get("nodes")
|
||||
nodes_top = (
|
||||
sorted(nodes.items(), key=lambda item: (-item[1], item[0]))[:3]
|
||||
if isinstance(nodes, dict)
|
||||
else []
|
||||
)
|
||||
output.append(
|
||||
{
|
||||
"namespace": entry.get("namespace"),
|
||||
"workload": entry.get("workload"),
|
||||
"source": entry.get("source"),
|
||||
"pods_total": entry.get("pods_total"),
|
||||
"pods_running": entry.get("pods_running"),
|
||||
"primary_node": entry.get("primary_node"),
|
||||
"nodes_top": nodes_top,
|
||||
}
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def _build_profiles(node_context: ProfileRows, namespace_context: ProfileRows, node_pods: ProfileRows, workloads: ProfileRows, node_workloads: NodeWorkloadMap) -> dict[str, Any]:
|
||||
return {
|
||||
"nodes": _node_profiles(node_context, node_pods, node_workloads),
|
||||
"namespaces": _namespace_profiles(namespace_context),
|
||||
"workloads": _workload_profiles(workloads),
|
||||
}
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
429
ariadne/services/cluster_state_relationships.py
Normal file
429
ariadne/services/cluster_state_relationships.py
Normal file
@ -0,0 +1,429 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .cluster_state_contract import *
|
||||
|
||||
def _vector_to_named(entries: list[dict[str, Any]], label_key: str, name_key: str) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
||||
value = item.get("value")
|
||||
label = metric.get(label_key) if isinstance(metric, dict) else None
|
||||
if not isinstance(label, str) or not label:
|
||||
continue
|
||||
output.append({name_key: label, "value": value, "metric": metric})
|
||||
output.sort(key=lambda item: (-(item.get("value") or 0), item.get(name_key) or ""))
|
||||
return output
|
||||
|
||||
|
||||
def _pvc_top(entries: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for item in entries:
|
||||
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
||||
namespace = metric.get("namespace")
|
||||
pvc = metric.get("persistentvolumeclaim")
|
||||
if not isinstance(namespace, str) or not isinstance(pvc, str):
|
||||
continue
|
||||
output.append(
|
||||
{
|
||||
"namespace": namespace,
|
||||
"pvc": pvc,
|
||||
"used_percent": item.get("value"),
|
||||
}
|
||||
)
|
||||
output.sort(key=lambda item: (-(item.get("used_percent") or 0), item.get("namespace") or ""))
|
||||
return output
|
||||
|
||||
|
||||
def _namespace_context(namespace_pods: list[dict[str, Any]], namespace_nodes: list[dict[str, Any]], namespace_capacity: list[dict[str, Any]], namespace_baseline: dict[str, dict[str, dict[str, float]]]) -> list[dict[str, Any]]:
|
||||
node_map = {entry.get("namespace"): entry for entry in namespace_nodes if isinstance(entry, dict)}
|
||||
cap_map = {entry.get("namespace"): entry for entry in namespace_capacity if isinstance(entry, dict)}
|
||||
output: list[dict[str, Any]] = []
|
||||
for entry in namespace_pods:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
namespace = entry.get("namespace")
|
||||
if not isinstance(namespace, str) or not namespace:
|
||||
continue
|
||||
nodes_entry = node_map.get(namespace, {})
|
||||
cap_entry = cap_map.get(namespace, {})
|
||||
nodes = nodes_entry.get("nodes") if isinstance(nodes_entry.get("nodes"), dict) else {}
|
||||
top_nodes: list[dict[str, Any]] = []
|
||||
if isinstance(nodes, dict):
|
||||
top_nodes = [
|
||||
{"node": name, "pods": count}
|
||||
for name, count in sorted(nodes.items(), key=lambda item: (-item[1], item[0]))[:3]
|
||||
]
|
||||
baseline = namespace_baseline.get(namespace, {}) if isinstance(namespace_baseline, dict) else {}
|
||||
delta_cpu = _baseline_delta(cap_entry.get("cpu_usage"), baseline.get("cpu", {}))
|
||||
delta_mem = _baseline_delta(cap_entry.get("mem_usage"), baseline.get("mem", {}))
|
||||
baseline_delta = {k: v for k, v in (("cpu", delta_cpu), ("mem", delta_mem)) if v is not None}
|
||||
output.append(
|
||||
{
|
||||
"namespace": namespace,
|
||||
"pods_total": entry.get("pods_total"),
|
||||
"pods_running": entry.get("pods_running"),
|
||||
"pods_pending": entry.get("pods_pending"),
|
||||
"pods_failed": entry.get("pods_failed"),
|
||||
"pods_succeeded": entry.get("pods_succeeded"),
|
||||
"primary_node": nodes_entry.get("primary_node"),
|
||||
"nodes_top": top_nodes,
|
||||
"cpu_usage": cap_entry.get("cpu_usage"),
|
||||
"cpu_requests": cap_entry.get("cpu_requests"),
|
||||
"cpu_ratio": cap_entry.get("cpu_usage_ratio"),
|
||||
"mem_usage": cap_entry.get("mem_usage"),
|
||||
"mem_requests": cap_entry.get("mem_requests"),
|
||||
"mem_ratio": cap_entry.get("mem_usage_ratio"),
|
||||
"baseline_delta": baseline_delta,
|
||||
}
|
||||
)
|
||||
output.sort(key=lambda item: (-(item.get("pods_total") or 0), item.get("namespace") or ""))
|
||||
return output
|
||||
|
||||
|
||||
def _namespace_nodes_top(namespace_context: list[dict[str, Any]], limit: int = 5) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for entry in namespace_context[:limit]:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
output.append(
|
||||
{
|
||||
"namespace": entry.get("namespace"),
|
||||
"pods_total": entry.get("pods_total"),
|
||||
"primary_node": entry.get("primary_node"),
|
||||
"nodes_top": entry.get("nodes_top") or [],
|
||||
}
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def _workload_nodes_top(workloads: list[dict[str, Any]], limit: int = 5) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
entries = [w for w in workloads if isinstance(w, dict)]
|
||||
entries.sort(
|
||||
key=lambda item: (-(item.get("pods_total") or 0), item.get("namespace") or "", item.get("workload") or ""),
|
||||
)
|
||||
for entry in entries[:limit]:
|
||||
output.append(
|
||||
{
|
||||
"namespace": entry.get("namespace"),
|
||||
"workload": entry.get("workload"),
|
||||
"source": entry.get("source"),
|
||||
"pods_total": entry.get("pods_total"),
|
||||
"pods_running": entry.get("pods_running"),
|
||||
"primary_node": entry.get("primary_node"),
|
||||
}
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def _node_workload_map(workloads: list[dict[str, Any]]) -> dict[str, dict[str, int]]:
|
||||
mapping: dict[str, dict[str, int]] = {}
|
||||
for entry in workloads:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
namespace = entry.get("namespace")
|
||||
workload = entry.get("workload")
|
||||
if not isinstance(workload, str) or not workload:
|
||||
continue
|
||||
nodes = entry.get("nodes")
|
||||
if not isinstance(nodes, dict):
|
||||
continue
|
||||
key = f"{namespace}/{workload}" if isinstance(namespace, str) and namespace else workload
|
||||
for node, count in nodes.items():
|
||||
if not isinstance(node, str) or not node:
|
||||
continue
|
||||
if not isinstance(count, int):
|
||||
try:
|
||||
count = int(count)
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if count <= 0:
|
||||
continue
|
||||
mapping.setdefault(node, {})[key] = mapping.setdefault(node, {}).get(key, 0) + count
|
||||
return mapping
|
||||
|
||||
|
||||
def _node_workloads_top(workload_map: dict[str, dict[str, int]], limit_nodes: int = _NODE_WORKLOAD_LIMIT, limit_workloads: int = _NODE_WORKLOAD_TOP) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for node, workloads in workload_map.items():
|
||||
if not isinstance(node, str) or not node or not isinstance(workloads, dict):
|
||||
continue
|
||||
total = sum(count for count in workloads.values() if isinstance(count, int))
|
||||
top = sorted(workloads.items(), key=lambda item: (-item[1], item[0]))[:limit_workloads]
|
||||
output.append({"node": node, "pods_total": total, "workloads_top": top})
|
||||
output.sort(key=lambda item: (-(item.get("pods_total") or 0), item.get("node") or ""))
|
||||
return output[:limit_nodes]
|
||||
|
||||
|
||||
def _workload_index(workloads: list[dict[str, Any]], limit: int = _WORKLOAD_INDEX_LIMIT) -> list[dict[str, Any]]:
|
||||
entries = [entry for entry in workloads if isinstance(entry, dict)]
|
||||
entries.sort(
|
||||
key=lambda item: (-(item.get("pods_total") or 0), item.get("namespace") or "", item.get("workload") or ""),
|
||||
)
|
||||
output: list[dict[str, Any]] = []
|
||||
for entry in entries[:limit]:
|
||||
nodes = entry.get("nodes") if isinstance(entry.get("nodes"), dict) else {}
|
||||
nodes_top = (
|
||||
sorted(nodes.items(), key=lambda item: (-item[1], item[0]))[:_NODE_WORKLOAD_TOP]
|
||||
if isinstance(nodes, dict)
|
||||
else []
|
||||
)
|
||||
output.append(
|
||||
{
|
||||
"namespace": entry.get("namespace"),
|
||||
"workload": entry.get("workload"),
|
||||
"pods_total": entry.get("pods_total"),
|
||||
"pods_running": entry.get("pods_running"),
|
||||
"primary_node": entry.get("primary_node"),
|
||||
"nodes_top": nodes_top,
|
||||
}
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def _events_summary(events: dict[str, Any]) -> dict[str, Any]:
|
||||
if not isinstance(events, dict):
|
||||
return {}
|
||||
by_namespace = events.get("warnings_by_namespace") if isinstance(events.get("warnings_by_namespace"), dict) else {}
|
||||
top_namespace = ""
|
||||
top_namespace_count = 0
|
||||
if by_namespace:
|
||||
top_namespace, top_namespace_count = sorted(
|
||||
by_namespace.items(), key=lambda item: (-item[1], item[0])
|
||||
)[0]
|
||||
return {
|
||||
"warnings_total": events.get("warnings_total"),
|
||||
"top_reason": events.get("warnings_top_reason"),
|
||||
"top_namespace": {"namespace": top_namespace, "count": top_namespace_count},
|
||||
"latest": events.get("warnings_latest"),
|
||||
"recent": (events.get("warnings_recent") or [])[:_EVENTS_SUMMARY_LIMIT],
|
||||
}
|
||||
|
||||
|
||||
def _build_lexicon() -> dict[str, Any]:
|
||||
terms = [
|
||||
{
|
||||
"term": "hottest",
|
||||
"meaning": "highest utilization for a metric (cpu, ram, net, io, load_index).",
|
||||
},
|
||||
{
|
||||
"term": "pressure",
|
||||
"meaning": "node condition flags (MemoryPressure, DiskPressure, PIDPressure, NetworkUnavailable).",
|
||||
},
|
||||
{
|
||||
"term": "load_index",
|
||||
"meaning": "composite load score derived from cpu, ram, net, io.",
|
||||
},
|
||||
{"term": "top", "meaning": "highest values within a category."},
|
||||
{"term": "pods", "meaning": "running workload instances on a node or namespace."},
|
||||
{"term": "workload", "meaning": "deployment/statefulset/daemonset grouping."},
|
||||
]
|
||||
aliases = {
|
||||
"hot node": "node with highest load_index",
|
||||
"hottest by cpu": "node with highest cpu utilization",
|
||||
"hottest by ram": "node with highest ram utilization",
|
||||
"pressure node": "node with pressure condition flags",
|
||||
}
|
||||
return {"terms": terms, "aliases": aliases}
|
||||
|
||||
|
||||
def _top_named_entries(entries: list[dict[str, Any]], name_key: str, limit: int) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for entry in entries or []:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
name = entry.get(name_key)
|
||||
if not isinstance(name, str) or not name:
|
||||
continue
|
||||
value = entry.get("value")
|
||||
try:
|
||||
numeric = float(value)
|
||||
except (TypeError, ValueError):
|
||||
numeric = 0.0
|
||||
output.append({"name": name, "value": numeric})
|
||||
output.sort(key=lambda item: -(item.get("value") or 0))
|
||||
return output[:limit]
|
||||
|
||||
|
||||
def _cross_node_metric_top(metrics: dict[str, Any], node_context: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
usage = metrics.get("node_usage") if isinstance(metrics.get("node_usage"), dict) else {}
|
||||
node_map = {entry.get("node"): entry for entry in node_context if isinstance(entry, dict)}
|
||||
output: list[dict[str, Any]] = []
|
||||
for metric in ("cpu", "ram", "net", "io", "disk"):
|
||||
series = usage.get(metric)
|
||||
if not isinstance(series, list):
|
||||
continue
|
||||
for top in _top_named_entries(series, "node", _CROSS_NODE_TOP):
|
||||
node = top.get("name")
|
||||
if not node:
|
||||
continue
|
||||
context = node_map.get(node, {})
|
||||
output.append(
|
||||
{
|
||||
"metric": metric,
|
||||
"node": node,
|
||||
"value": top.get("value"),
|
||||
"cpu": context.get("cpu"),
|
||||
"ram": context.get("ram"),
|
||||
"net": context.get("net"),
|
||||
"io": context.get("io"),
|
||||
"disk": context.get("disk"),
|
||||
"load_index": context.get("load_index"),
|
||||
"pods_total": context.get("pods_total"),
|
||||
"hardware": context.get("hardware"),
|
||||
"roles": context.get("roles"),
|
||||
"pressure_flags": context.get("pressure_flags"),
|
||||
}
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def _cross_namespace_metric_top(metrics: dict[str, Any], namespace_context: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
top = metrics.get("namespace_top") if isinstance(metrics.get("namespace_top"), dict) else {}
|
||||
namespace_map = {
|
||||
entry.get("namespace"): entry
|
||||
for entry in namespace_context
|
||||
if isinstance(entry, dict) and entry.get("namespace")
|
||||
}
|
||||
output: list[dict[str, Any]] = []
|
||||
for metric in ("cpu", "mem", "net", "io", "restarts"):
|
||||
series = top.get(metric)
|
||||
if not isinstance(series, list):
|
||||
continue
|
||||
for entry in _top_named_entries(series, "namespace", _CROSS_NAMESPACE_TOP):
|
||||
namespace = entry.get("name")
|
||||
if not namespace:
|
||||
continue
|
||||
context = namespace_map.get(namespace, {})
|
||||
output.append(
|
||||
{
|
||||
"metric": metric,
|
||||
"namespace": namespace,
|
||||
"value": entry.get("value"),
|
||||
"pods_total": context.get("pods_total"),
|
||||
"pods_running": context.get("pods_running"),
|
||||
"cpu_ratio": context.get("cpu_ratio"),
|
||||
"mem_ratio": context.get("mem_ratio"),
|
||||
"primary_node": context.get("primary_node"),
|
||||
"nodes_top": context.get("nodes_top") or [],
|
||||
}
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def _build_cross_stats(metrics: dict[str, Any], node_context: list[dict[str, Any]], namespace_context: list[dict[str, Any]], workloads: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
return {
|
||||
"node_metric_top": _cross_node_metric_top(metrics, node_context),
|
||||
"namespace_metric_top": _cross_namespace_metric_top(metrics, namespace_context),
|
||||
"pvc_top": _pvc_top(metrics.get("pvc_usage_top", []))[:_CROSS_PVC_TOP],
|
||||
"workload_top": _workload_nodes_top(workloads, _CROSS_NAMESPACE_TOP),
|
||||
}
|
||||
|
||||
|
||||
def _node_context(node_details: list[dict[str, Any]], node_load: list[dict[str, Any]], node_baseline: dict[str, dict[str, dict[str, float]]], node_workloads: dict[str, dict[str, int]]) -> list[dict[str, Any]]:
|
||||
load_map = {entry.get("node"): entry for entry in node_load if isinstance(entry, dict)}
|
||||
output: list[dict[str, Any]] = []
|
||||
for entry in node_details:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
name = entry.get("name")
|
||||
if not isinstance(name, str) or not name:
|
||||
continue
|
||||
load_entry = load_map.get(name, {})
|
||||
baseline = node_baseline.get(name, {}) if isinstance(node_baseline, dict) else {}
|
||||
deltas: dict[str, float] = {}
|
||||
for key in ("cpu", "ram", "net", "io", "disk"):
|
||||
current = load_entry.get(key)
|
||||
stats = baseline.get(key, {}) if isinstance(baseline, dict) else {}
|
||||
delta = _baseline_delta(current, stats)
|
||||
if delta is not None:
|
||||
deltas[key] = delta
|
||||
workloads = node_workloads.get(name, {}) if isinstance(node_workloads, dict) else {}
|
||||
workloads_top = sorted(workloads.items(), key=lambda item: (-item[1], item[0]))[:_NODE_WORKLOAD_TOP]
|
||||
output.append(
|
||||
{
|
||||
"node": name,
|
||||
"ready": entry.get("ready"),
|
||||
"roles": entry.get("roles"),
|
||||
"is_worker": entry.get("is_worker"),
|
||||
"hardware": entry.get("hardware"),
|
||||
"arch": entry.get("arch"),
|
||||
"os": entry.get("os"),
|
||||
"taints": entry.get("taints"),
|
||||
"unschedulable": entry.get("unschedulable"),
|
||||
"pressure_flags": entry.get("pressure"),
|
||||
"pods_total": load_entry.get("pods_total"),
|
||||
"cpu": load_entry.get("cpu"),
|
||||
"ram": load_entry.get("ram"),
|
||||
"disk": load_entry.get("disk"),
|
||||
"net": load_entry.get("net"),
|
||||
"io": load_entry.get("io"),
|
||||
"load_index": load_entry.get("load_index"),
|
||||
"baseline": baseline,
|
||||
"baseline_delta": deltas,
|
||||
"workloads_top": workloads_top,
|
||||
}
|
||||
)
|
||||
output.sort(key=lambda item: (-(item.get("load_index") or 0), item.get("node") or ""))
|
||||
return output
|
||||
|
||||
|
||||
def _baseline_delta(current: Any, stats: dict[str, Any]) -> float | None:
|
||||
if not isinstance(current, (int, float)):
|
||||
return None
|
||||
avg = stats.get("avg")
|
||||
if not isinstance(avg, (int, float)) or avg == 0:
|
||||
return None
|
||||
return round(((float(current) - float(avg)) / float(avg)) * 100, 2)
|
||||
|
||||
|
||||
def _delta_severity(delta: float) -> str:
|
||||
magnitude = abs(delta)
|
||||
if magnitude >= _BASELINE_DELTA_CRIT:
|
||||
return "critical"
|
||||
if magnitude >= _BASELINE_DELTA_WARN:
|
||||
return "warning"
|
||||
return "info"
|
||||
|
||||
|
||||
def _delta_entry_label(entry: dict[str, Any]) -> tuple[str, str]:
|
||||
if "node" in entry:
|
||||
return ("node", str(entry.get("node") or ""))
|
||||
return ("namespace", str(entry.get("namespace") or ""))
|
||||
|
||||
|
||||
def _delta_top(entries: list[dict[str, Any]], key: str, limit: int = _DELTA_TOP_LIMIT) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for entry in entries:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
deltas = entry.get("baseline_delta") if isinstance(entry.get("baseline_delta"), dict) else {}
|
||||
delta = deltas.get(key)
|
||||
if not isinstance(delta, (int, float)):
|
||||
continue
|
||||
label_key, label_value = _delta_entry_label(entry)
|
||||
output.append(
|
||||
{
|
||||
label_key: label_value,
|
||||
"metric": key,
|
||||
"delta": delta,
|
||||
"severity": _delta_severity(float(delta)),
|
||||
}
|
||||
)
|
||||
output.sort(key=lambda item: (-(abs(item.get("delta") or 0)), item.get("metric") or ""))
|
||||
return output[:limit]
|
||||
|
||||
|
||||
def _reason_top(counts: dict[str, Any], limit: int = _REASON_TOP_LIMIT) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for reason, value in counts.items() if isinstance(counts, dict) else []:
|
||||
if isinstance(reason, str) and reason and isinstance(value, (int, float)):
|
||||
output.append({"reason": reason, "count": int(value)})
|
||||
output.sort(key=lambda item: (-item.get("count", 0), item.get("reason") or ""))
|
||||
return output[:limit]
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
160
ariadne/services/cluster_state_signals.py
Normal file
160
ariadne/services/cluster_state_signals.py
Normal file
@ -0,0 +1,160 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .cluster_state_anomalies import *
|
||||
from .cluster_state_contract import *
|
||||
from .cluster_state_health import *
|
||||
from .cluster_state_relationships import *
|
||||
|
||||
def _pod_issue_summary(pod_issues: dict[str, Any], metrics: dict[str, Any]) -> dict[str, Any]:
|
||||
waiting = pod_issues.get("waiting_reasons") if isinstance(pod_issues, dict) else {}
|
||||
phase = pod_issues.get("phase_reasons") if isinstance(pod_issues, dict) else {}
|
||||
return {
|
||||
"waiting_reasons_top": _reason_top(waiting),
|
||||
"phase_reasons_top": _reason_top(phase),
|
||||
"namespace_issue_top": metrics.get("namespace_issue_top") or {},
|
||||
}
|
||||
|
||||
|
||||
def _delta_hit(delta: Any) -> bool:
|
||||
if not isinstance(delta, (int, float)):
|
||||
return False
|
||||
return abs(float(delta)) >= _BASELINE_DELTA_WARN
|
||||
|
||||
|
||||
def _node_delta_signals(node_context: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
signals: list[dict[str, Any]] = []
|
||||
for entry in node_context:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
node = entry.get("node")
|
||||
deltas = entry.get("baseline_delta") if isinstance(entry.get("baseline_delta"), dict) else {}
|
||||
baseline = entry.get("baseline") if isinstance(entry.get("baseline"), dict) else {}
|
||||
if not isinstance(node, str) or not node:
|
||||
continue
|
||||
for metric in ("cpu", "ram", "net", "io", "disk"):
|
||||
delta = deltas.get(metric)
|
||||
if not _delta_hit(delta):
|
||||
continue
|
||||
avg = baseline.get(metric, {}).get("avg") if isinstance(baseline.get(metric), dict) else None
|
||||
signals.append(
|
||||
{
|
||||
"scope": "node",
|
||||
"target": node,
|
||||
"metric": metric,
|
||||
"current": entry.get(metric),
|
||||
"baseline_avg": avg,
|
||||
"delta_pct": delta,
|
||||
"severity": _delta_severity(float(delta)),
|
||||
}
|
||||
)
|
||||
return signals
|
||||
|
||||
|
||||
def _namespace_delta_signals(namespace_context: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
signals: list[dict[str, Any]] = []
|
||||
for entry in namespace_context:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
namespace = entry.get("namespace")
|
||||
deltas = entry.get("baseline_delta") if isinstance(entry.get("baseline_delta"), dict) else {}
|
||||
baseline = entry.get("baseline") if isinstance(entry.get("baseline"), dict) else {}
|
||||
if not isinstance(namespace, str) or not namespace:
|
||||
continue
|
||||
for metric, current_key in (("cpu", "cpu_usage"), ("mem", "mem_usage")):
|
||||
delta = deltas.get(metric)
|
||||
if not _delta_hit(delta):
|
||||
continue
|
||||
avg = baseline.get(metric, {}).get("avg") if isinstance(baseline.get(metric), dict) else None
|
||||
signals.append(
|
||||
{
|
||||
"scope": "namespace",
|
||||
"target": namespace,
|
||||
"metric": metric,
|
||||
"current": entry.get(current_key),
|
||||
"baseline_avg": avg,
|
||||
"delta_pct": delta,
|
||||
"severity": _delta_severity(float(delta)),
|
||||
}
|
||||
)
|
||||
return signals
|
||||
|
||||
|
||||
def _kustomization_signals(kustomizations: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
count = int(kustomizations.get("not_ready") or 0) if isinstance(kustomizations, dict) else 0
|
||||
if count <= 0:
|
||||
return []
|
||||
return [
|
||||
{
|
||||
"scope": "flux",
|
||||
"target": "kustomizations",
|
||||
"metric": "not_ready",
|
||||
"current": count,
|
||||
"severity": "warning",
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def _pod_issue_signals(pod_issues: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
if not isinstance(pod_issues, dict):
|
||||
return []
|
||||
signals: list[dict[str, Any]] = []
|
||||
pending_over = int(pod_issues.get("pending_over_15m") or 0)
|
||||
if pending_over > 0:
|
||||
signals.append(
|
||||
{
|
||||
"scope": "pods",
|
||||
"target": "pending_over_15m",
|
||||
"metric": "count",
|
||||
"current": pending_over,
|
||||
"severity": "warning",
|
||||
}
|
||||
)
|
||||
counts = pod_issues.get("counts") if isinstance(pod_issues.get("counts"), dict) else {}
|
||||
failed = int(counts.get("Failed") or 0) if isinstance(counts, dict) else 0
|
||||
if failed > 0:
|
||||
signals.append(
|
||||
{
|
||||
"scope": "pods",
|
||||
"target": "failed",
|
||||
"metric": "count",
|
||||
"current": failed,
|
||||
"severity": "critical",
|
||||
}
|
||||
)
|
||||
return signals
|
||||
|
||||
|
||||
def _workload_health_signals(workloads_health: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
not_ready = _workload_not_ready_items(workloads_health)
|
||||
if not not_ready:
|
||||
return []
|
||||
output: list[dict[str, Any]] = []
|
||||
for entry in not_ready[:5]:
|
||||
output.append(
|
||||
{
|
||||
"scope": "workload",
|
||||
"target": f"{entry.get('namespace')}/{entry.get('workload')}",
|
||||
"metric": "not_ready",
|
||||
"current": entry.get("ready") or 0,
|
||||
"desired": entry.get("desired") or 0,
|
||||
"severity": "warning",
|
||||
}
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def _build_signals(context: SignalContext) -> list[dict[str, Any]]:
|
||||
signals = (
|
||||
_node_delta_signals(context.node_context)
|
||||
+ _namespace_delta_signals(context.namespace_context)
|
||||
+ _workload_health_signals(context.workloads_health)
|
||||
+ _pod_issue_signals(context.pod_issues)
|
||||
+ _kustomization_signals(context.kustomizations)
|
||||
+ _pvc_pressure_signals(context.metrics)
|
||||
)
|
||||
signals.sort(key=lambda item: (_severity_rank(item.get("severity")), item.get("scope") or ""))
|
||||
return signals[:_SIGNAL_LIMIT]
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
309
ariadne/services/cluster_state_vm_client.py
Normal file
309
ariadne/services/cluster_state_vm_client.py
Normal file
@ -0,0 +1,309 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from typing import Any, Callable
|
||||
|
||||
import httpx
|
||||
|
||||
from ..settings import settings
|
||||
from .cluster_state_contract import *
|
||||
from .cluster_state_flux_events import *
|
||||
from .cluster_state_relationships import *
|
||||
|
||||
|
||||
def _facade_override(name: str, original: Callable[..., Any]) -> Callable[..., Any] | None:
|
||||
facade = sys.modules.get("ariadne.services.cluster_state")
|
||||
candidate = getattr(facade, name, None) if facade is not None else None
|
||||
if candidate is not None and candidate is not original:
|
||||
return candidate
|
||||
return None
|
||||
|
||||
|
||||
def _vm_query(expr: str) -> list[dict[str, Any]] | None:
|
||||
base = settings.vm_url
|
||||
if not base:
|
||||
return None
|
||||
url = f"{base.rstrip('/')}/api/v1/query"
|
||||
params = {"query": expr}
|
||||
with httpx.Client(timeout=settings.cluster_state_vm_timeout_sec) as client:
|
||||
resp = client.get(url, params=params)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
if payload.get("status") != "success":
|
||||
return None
|
||||
data = payload.get("data") if isinstance(payload.get("data"), dict) else {}
|
||||
result = data.get("result")
|
||||
return result if isinstance(result, list) else None
|
||||
|
||||
|
||||
def _vm_scalar(expr: str) -> float | None:
|
||||
override = _facade_override("_vm_scalar", _vm_scalar)
|
||||
if override is not None:
|
||||
return override(expr)
|
||||
result = _vm_query(expr)
|
||||
if not result:
|
||||
return None
|
||||
value = result[0].get("value") if isinstance(result[0], dict) else None
|
||||
if not isinstance(value, list) or len(value) < _VALUE_PAIR_LEN:
|
||||
return None
|
||||
try:
|
||||
return float(value[1])
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def _vm_vector(expr: str) -> list[dict[str, Any]]:
|
||||
override = _facade_override("_vm_vector", _vm_vector)
|
||||
if override is not None:
|
||||
return override(expr)
|
||||
result = _vm_query(expr) or []
|
||||
output: list[dict[str, Any]] = []
|
||||
for item in result:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
||||
value = item.get("value") if isinstance(item.get("value"), list) else []
|
||||
if len(value) < _VALUE_PAIR_LEN:
|
||||
continue
|
||||
try:
|
||||
numeric = float(value[1])
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
output.append({"metric": metric, "value": numeric})
|
||||
return output
|
||||
|
||||
|
||||
def _alert_entries(entries: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
||||
value = item.get("value")
|
||||
name = metric.get("alertname")
|
||||
if not isinstance(name, str) or not name:
|
||||
continue
|
||||
severity = metric.get("severity") if isinstance(metric.get("severity"), str) else ""
|
||||
output.append(
|
||||
{
|
||||
"alert": name,
|
||||
"severity": severity,
|
||||
"value": value,
|
||||
}
|
||||
)
|
||||
output.sort(key=lambda item: (-(item.get("value") or 0), item.get("alert") or ""))
|
||||
return output
|
||||
|
||||
|
||||
def _vm_alerts_now() -> list[dict[str, Any]]:
|
||||
entries = _vm_vector('sum by (alertname,severity) (ALERTS{alertstate="firing"})')
|
||||
return _alert_entries(entries)[:_ALERT_TOP_LIMIT]
|
||||
|
||||
|
||||
def _vm_alerts_trend(window: str) -> list[dict[str, Any]]:
|
||||
entries = _vm_vector(
|
||||
f"topk({_ALERT_TOP_LIMIT}, sum by (alertname,severity) (count_over_time(ALERTS{{alertstate=\"firing\"}}[{window}])))"
|
||||
)
|
||||
return _alert_entries(entries)
|
||||
|
||||
|
||||
def _alertmanager_alerts(errors: list[str]) -> list[dict[str, Any]]:
|
||||
base = settings.alertmanager_url
|
||||
if not base:
|
||||
return []
|
||||
url = f"{base.rstrip('/')}/api/v2/alerts"
|
||||
try:
|
||||
with httpx.Client(timeout=settings.cluster_state_vm_timeout_sec) as client:
|
||||
resp = client.get(url)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
if isinstance(payload, list):
|
||||
return [item for item in payload if isinstance(item, dict)]
|
||||
except Exception as exc:
|
||||
errors.append(f"alertmanager: {exc}")
|
||||
return []
|
||||
|
||||
|
||||
def _summarize_alerts(alerts: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
items: list[dict[str, Any]] = []
|
||||
by_severity: dict[str, int] = {}
|
||||
for alert in alerts:
|
||||
labels = alert.get("labels") if isinstance(alert.get("labels"), dict) else {}
|
||||
alertname = labels.get("alertname")
|
||||
if not isinstance(alertname, str) or not alertname:
|
||||
continue
|
||||
severity = labels.get("severity") if isinstance(labels.get("severity"), str) else ""
|
||||
items.append({"alert": alertname, "severity": severity})
|
||||
if severity:
|
||||
by_severity[severity] = by_severity.get(severity, 0) + 1
|
||||
items.sort(key=lambda item: (item.get("severity") or "", item.get("alert") or ""))
|
||||
return {
|
||||
"total": len(items),
|
||||
"by_severity": by_severity,
|
||||
"items": items[:_ALERT_TOP_LIMIT],
|
||||
}
|
||||
|
||||
|
||||
def _filter_namespace_vector(entries: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
||||
namespace = metric.get("namespace")
|
||||
if not isinstance(namespace, str) or not namespace:
|
||||
continue
|
||||
if namespace in _SYSTEM_NAMESPACES:
|
||||
continue
|
||||
output.append(item)
|
||||
return output
|
||||
|
||||
|
||||
def _vm_topk(expr: str, label_key: str) -> dict[str, Any] | None:
|
||||
result = _vm_vector(expr)
|
||||
if not result:
|
||||
return None
|
||||
metric = result[0].get("metric") if isinstance(result[0], dict) else {}
|
||||
value = result[0].get("value")
|
||||
label = metric.get(label_key) if isinstance(metric, dict) else None
|
||||
return {"label": label or "", "value": value, "metric": metric}
|
||||
|
||||
|
||||
def _vm_node_metric(expr: str, label_key: str) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for item in _vm_vector(expr):
|
||||
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
||||
label = metric.get(label_key)
|
||||
value = item.get("value")
|
||||
if isinstance(label, str) and label:
|
||||
output.append({"node": label, "value": value})
|
||||
output.sort(key=lambda item: item.get("node") or "")
|
||||
return output
|
||||
|
||||
|
||||
def _vm_baseline_map(expr: str, label_key: str, window: str) -> dict[str, dict[str, float]]:
|
||||
averages = _vm_vector(f"avg_over_time(({expr})[{window}])")
|
||||
maximums = _vm_vector(f"max_over_time(({expr})[{window}])")
|
||||
baseline: dict[str, dict[str, float]] = {}
|
||||
for item in averages:
|
||||
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
||||
label = metric.get(label_key)
|
||||
if not isinstance(label, str) or not label:
|
||||
continue
|
||||
baseline.setdefault(label, {})["avg"] = float(item.get("value") or 0)
|
||||
for item in maximums:
|
||||
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
||||
label = metric.get(label_key)
|
||||
if not isinstance(label, str) or not label:
|
||||
continue
|
||||
baseline.setdefault(label, {})["max"] = float(item.get("value") or 0)
|
||||
return baseline
|
||||
|
||||
|
||||
def _baseline_map_to_list(baseline: dict[str, dict[str, float]], name_key: str) -> list[dict[str, Any]]:
|
||||
output: list[dict[str, Any]] = []
|
||||
for name, stats in baseline.items():
|
||||
if not isinstance(name, str) or not name:
|
||||
continue
|
||||
output.append(
|
||||
{
|
||||
name_key: name,
|
||||
"avg": stats.get("avg"),
|
||||
"max": stats.get("max"),
|
||||
}
|
||||
)
|
||||
output.sort(key=lambda item: (-(item.get("avg") or 0), item.get(name_key) or ""))
|
||||
return output
|
||||
|
||||
|
||||
def _limit_entries(entries: list[dict[str, Any]], limit: int) -> list[dict[str, Any]]:
|
||||
if limit <= 0:
|
||||
return []
|
||||
return entries[:limit]
|
||||
|
||||
|
||||
def _vm_window_series(expr: str, label_key: str, name_key: str, window: str) -> dict[str, list[dict[str, Any]]]:
|
||||
avg = _vector_to_named(
|
||||
_vm_vector(f"avg_over_time(({expr})[{window}])"),
|
||||
label_key,
|
||||
name_key,
|
||||
)
|
||||
max_values = _vector_to_named(
|
||||
_vm_vector(f"max_over_time(({expr})[{window}])"),
|
||||
label_key,
|
||||
name_key,
|
||||
)
|
||||
p95 = _vector_to_named(
|
||||
_vm_vector(f"quantile_over_time(0.95, ({expr})[{window}])"),
|
||||
label_key,
|
||||
name_key,
|
||||
)
|
||||
return {"avg": avg, "max": max_values, "p95": p95}
|
||||
|
||||
|
||||
def _trim_window_series(series: dict[str, list[dict[str, Any]]], limit: int) -> dict[str, list[dict[str, Any]]]:
|
||||
return {key: _limit_entries(entries, limit) for key, entries in series.items()}
|
||||
|
||||
|
||||
def _build_metric_trends(exprs: dict[str, str], label_key: str, name_key: str, windows: tuple[str, ...], limit: int) -> dict[str, dict[str, dict[str, list[dict[str, Any]]]]]:
|
||||
trends: dict[str, dict[str, dict[str, list[dict[str, Any]]]]] = {}
|
||||
for metric, expr in exprs.items():
|
||||
metric_trends: dict[str, dict[str, list[dict[str, Any]]]] = {}
|
||||
for window in windows:
|
||||
series = _vm_window_series(expr, label_key, name_key, window)
|
||||
metric_trends[window] = _trim_window_series(series, limit)
|
||||
trends[metric] = metric_trends
|
||||
return trends
|
||||
|
||||
|
||||
def _vm_scalar_window(expr: str, window: str, fn: str) -> float | None:
|
||||
return _vm_scalar(f"{fn}(({expr})[{window}])")
|
||||
|
||||
|
||||
def _scalar_trends(expr: str, windows: tuple[str, ...]) -> dict[str, dict[str, float | None]]:
|
||||
return {
|
||||
window: {
|
||||
"avg": _vm_scalar_window(expr, window, "avg_over_time"),
|
||||
"min": _vm_scalar_window(expr, window, "min_over_time"),
|
||||
"max": _vm_scalar_window(expr, window, "max_over_time"),
|
||||
}
|
||||
for window in windows
|
||||
}
|
||||
|
||||
|
||||
def _cluster_trends() -> dict[str, dict[str, dict[str, float | None]]]:
|
||||
exprs = {
|
||||
"nodes_ready": 'sum(kube_node_status_condition{condition="Ready",status="true"})',
|
||||
"nodes_not_ready": 'sum(kube_node_status_condition{condition="Ready",status="false"})',
|
||||
"pods_running": 'sum(kube_pod_status_phase{phase="Running"})',
|
||||
"pods_pending": 'sum(kube_pod_status_phase{phase="Pending"})',
|
||||
"pods_failed": 'sum(kube_pod_status_phase{phase="Failed"})',
|
||||
"pods_succeeded": 'sum(kube_pod_status_phase{phase="Succeeded"})',
|
||||
"alerts_firing": 'sum(ALERTS{alertstate="firing"})',
|
||||
"cpu_usage": f'sum(rate(container_cpu_usage_seconds_total{{namespace!=""}}[{_RATE_WINDOW}]))',
|
||||
"mem_usage": 'sum(container_memory_working_set_bytes{namespace!=""})',
|
||||
"net_io": (
|
||||
f'sum(rate(container_network_receive_bytes_total{{namespace!=""}}[{_RATE_WINDOW}]) '
|
||||
f'+ rate(container_network_transmit_bytes_total{{namespace!=""}}[{_RATE_WINDOW}]))'
|
||||
),
|
||||
"fs_io": (
|
||||
f'sum(rate(container_fs_reads_bytes_total{{namespace!=""}}[{_RATE_WINDOW}]) '
|
||||
f'+ rate(container_fs_writes_bytes_total{{namespace!=""}}[{_RATE_WINDOW}]))'
|
||||
),
|
||||
}
|
||||
return {key: _scalar_trends(expr, _TREND_WINDOWS) for key, expr in exprs.items()}
|
||||
|
||||
|
||||
def _node_condition_trends() -> dict[str, dict[str, dict[str, float | None]]]:
|
||||
conditions = {
|
||||
"ready": 'sum(kube_node_status_condition{condition="Ready",status="true"})',
|
||||
"not_ready": 'sum(kube_node_status_condition{condition="Ready",status="false"})',
|
||||
"unschedulable": "sum(kube_node_spec_unschedulable)",
|
||||
}
|
||||
for cond in _PRESSURE_TYPES:
|
||||
conditions[cond.lower()] = (
|
||||
f'sum(kube_node_status_condition{{condition="{cond}",status="true"}})'
|
||||
)
|
||||
return {key: _scalar_trends(expr, _TREND_WINDOWS) for key, expr in conditions.items()}
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
187
ariadne/services/cluster_state_vm_trends.py
Normal file
187
ariadne/services/cluster_state_vm_trends.py
Normal file
@ -0,0 +1,187 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .cluster_state_contract import *
|
||||
from .cluster_state_relationships import *
|
||||
from .cluster_state_vm_client import *
|
||||
|
||||
def _pod_reason_totals(
|
||||
reasons: dict[str, str],
|
||||
series: str,
|
||||
) -> dict[str, dict[str, dict[str, float | None]]]:
|
||||
totals: dict[str, dict[str, dict[str, float | None]]] = {}
|
||||
for key, reason in reasons.items():
|
||||
expr = f'sum({series}{{reason="{reason}"}})'
|
||||
totals[key] = _scalar_trends(expr, _TREND_WINDOWS)
|
||||
return totals
|
||||
|
||||
|
||||
def _node_usage_exprs() -> dict[str, str]:
|
||||
return {
|
||||
"cpu": (
|
||||
f'avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{{mode="idle"}}[{_RATE_WINDOW}]))) * 100) '
|
||||
'* on(instance) group_left(node) label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)"))'
|
||||
),
|
||||
"ram": (
|
||||
'avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) '
|
||||
'/ node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)"))'
|
||||
),
|
||||
"net": (
|
||||
f'avg by (node) ((sum by (instance) (rate(node_network_receive_bytes_total{{device!~"lo"}}[{_RATE_WINDOW}]) '
|
||||
f'+ rate(node_network_transmit_bytes_total{{device!~"lo"}}[{_RATE_WINDOW}]))) * on(instance) group_left(node) '
|
||||
'label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)"))'
|
||||
),
|
||||
"io": (
|
||||
f'avg by (node) ((sum by (instance) (rate(node_disk_read_bytes_total[{_RATE_WINDOW}]) + rate(node_disk_written_bytes_total[{_RATE_WINDOW}]))) '
|
||||
'* on(instance) group_left(node) label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)"))'
|
||||
),
|
||||
"disk": (
|
||||
'avg by (node) (((1 - avg by (instance) (node_filesystem_avail_bytes{mountpoint="/",fstype!~"tmpfs|overlay"} '
|
||||
'/ node_filesystem_size_bytes{mountpoint="/",fstype!~"tmpfs|overlay"})) * 100) * on(instance) group_left(node) '
|
||||
'label_replace(node_uname_info{nodename!=""}, "node", "$1", "nodename", "(.*)"))'
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _namespace_usage_exprs() -> dict[str, str]:
|
||||
return {
|
||||
"cpu": f'sum by (namespace) (rate(container_cpu_usage_seconds_total{{namespace!=""}}[{_RATE_WINDOW}]))',
|
||||
"mem": 'sum by (namespace) (container_memory_working_set_bytes{namespace!=""})',
|
||||
}
|
||||
|
||||
|
||||
def _namespace_request_exprs() -> dict[str, str]:
|
||||
return {
|
||||
"cpu_requests": "sum by (namespace) (kube_pod_container_resource_requests_cpu_cores)",
|
||||
"mem_requests": "sum by (namespace) (kube_pod_container_resource_requests_memory_bytes)",
|
||||
}
|
||||
|
||||
|
||||
def _restart_namespace_trend(window: str) -> list[dict[str, Any]]:
|
||||
entries = _vm_vector(
|
||||
f"topk({_TREND_NAMESPACE_LIMIT}, sum by (namespace) (increase(kube_pod_container_status_restarts_total[{window}])))"
|
||||
)
|
||||
entries = _filter_namespace_vector(entries)
|
||||
return _vector_to_named(entries, "namespace", "namespace")
|
||||
|
||||
|
||||
def _job_failure_trend(window: str) -> list[dict[str, Any]]:
|
||||
entries = _vm_vector(
|
||||
f"topk({_TREND_JOB_LIMIT}, sum by (namespace,job_name) (increase(kube_job_status_failed[{window}])))"
|
||||
)
|
||||
output: list[dict[str, Any]] = []
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
||||
namespace = metric.get("namespace")
|
||||
job = metric.get("job_name")
|
||||
if not isinstance(namespace, str) or not isinstance(job, str):
|
||||
continue
|
||||
output.append(
|
||||
{
|
||||
"namespace": namespace,
|
||||
"job": job,
|
||||
"value": item.get("value"),
|
||||
}
|
||||
)
|
||||
output.sort(key=lambda item: (-(item.get("value") or 0), item.get("namespace") or "", item.get("job") or ""))
|
||||
return output
|
||||
|
||||
|
||||
def _pod_reason_entries(expr: str, limit: int) -> list[dict[str, Any]]:
|
||||
entries = _vm_vector(f"topk({limit}, sum by (namespace,pod) ({expr}))")
|
||||
output: list[dict[str, Any]] = []
|
||||
for item in entries:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
||||
namespace = metric.get("namespace")
|
||||
pod = metric.get("pod")
|
||||
if not isinstance(namespace, str) or not isinstance(pod, str):
|
||||
continue
|
||||
output.append(
|
||||
{
|
||||
"namespace": namespace,
|
||||
"pod": pod,
|
||||
"value": item.get("value"),
|
||||
}
|
||||
)
|
||||
output.sort(key=lambda item: (-(item.get("value") or 0), item.get("namespace") or "", item.get("pod") or ""))
|
||||
return output
|
||||
|
||||
|
||||
def _namespace_reason_entries(expr: str, limit: int) -> list[dict[str, Any]]:
|
||||
entries = _vm_vector(f"topk({limit}, sum by (namespace) ({expr}))")
|
||||
entries = _filter_namespace_vector(entries)
|
||||
return _vector_to_named(entries, "namespace", "namespace")
|
||||
|
||||
|
||||
def _pod_waiting_now() -> dict[str, list[dict[str, Any]]]:
|
||||
output: dict[str, list[dict[str, Any]]] = {}
|
||||
for key, reason in _POD_WAITING_REASONS.items():
|
||||
expr = f'kube_pod_container_status_waiting_reason{{reason="{reason}"}}'
|
||||
output[key] = _pod_reason_entries(expr, _POD_REASON_LIMIT)
|
||||
return output
|
||||
|
||||
|
||||
def _pod_waiting_trends() -> dict[str, dict[str, list[dict[str, Any]]]]:
|
||||
trends: dict[str, dict[str, list[dict[str, Any]]]] = {}
|
||||
for key, reason in _POD_WAITING_REASONS.items():
|
||||
expr = f'kube_pod_container_status_waiting_reason{{reason="{reason}"}}'
|
||||
trends[key] = {
|
||||
window: _pod_reason_entries(f"max_over_time(({expr})[{window}])", _POD_REASON_TREND_LIMIT)
|
||||
for window in _TREND_WINDOWS
|
||||
}
|
||||
return trends
|
||||
|
||||
|
||||
def _pod_terminated_now() -> dict[str, list[dict[str, Any]]]:
|
||||
output: dict[str, list[dict[str, Any]]] = {}
|
||||
for key, reason in _POD_TERMINATED_REASONS.items():
|
||||
expr = f'kube_pod_container_status_terminated_reason{{reason="{reason}"}}'
|
||||
output[key] = _pod_reason_entries(expr, _POD_REASON_LIMIT)
|
||||
return output
|
||||
|
||||
|
||||
def _pod_terminated_trends() -> dict[str, dict[str, list[dict[str, Any]]]]:
|
||||
trends: dict[str, dict[str, list[dict[str, Any]]]] = {}
|
||||
for key, reason in _POD_TERMINATED_REASONS.items():
|
||||
expr = f'kube_pod_container_status_terminated_reason{{reason="{reason}"}}'
|
||||
trends[key] = {
|
||||
window: _pod_reason_entries(f"max_over_time(({expr})[{window}])", _POD_REASON_TREND_LIMIT)
|
||||
for window in _TREND_WINDOWS
|
||||
}
|
||||
return trends
|
||||
|
||||
|
||||
def _pods_phase_trends() -> dict[str, dict[str, dict[str, float | None]]]:
|
||||
phases = {
|
||||
"running": "sum(kube_pod_status_phase{phase=\"Running\"})",
|
||||
"pending": "sum(kube_pod_status_phase{phase=\"Pending\"})",
|
||||
"failed": "sum(kube_pod_status_phase{phase=\"Failed\"})",
|
||||
}
|
||||
trends: dict[str, dict[str, dict[str, float | None]]] = {}
|
||||
for window in _TREND_WINDOWS:
|
||||
window_entry: dict[str, dict[str, float | None]] = {}
|
||||
for name, expr in phases.items():
|
||||
window_entry[name] = {
|
||||
"avg": _vm_scalar_window(expr, window, "avg_over_time"),
|
||||
"max": _vm_scalar_window(expr, window, "max_over_time"),
|
||||
}
|
||||
trends[window] = window_entry
|
||||
return trends
|
||||
|
||||
|
||||
def _pvc_usage_trends() -> dict[str, list[dict[str, Any]]]:
|
||||
trends: dict[str, list[dict[str, Any]]] = {}
|
||||
expr = "kubelet_volume_stats_used_bytes / kubelet_volume_stats_capacity_bytes * 100"
|
||||
for window in _TREND_WINDOWS:
|
||||
entries = _vm_vector(
|
||||
f"topk({_TREND_PVC_LIMIT}, max_over_time(({expr})[{window}]))"
|
||||
)
|
||||
trends[window] = _pvc_top(entries)
|
||||
return trends
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
330
ariadne/services/cluster_state_vm_usage.py
Normal file
330
ariadne/services/cluster_state_vm_usage.py
Normal file
@ -0,0 +1,330 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .cluster_state_contract import *
|
||||
from .cluster_state_vm_client import *
|
||||
from .cluster_state_vm_trends import *
|
||||
|
||||
def _postgres_connections(errors: list[str]) -> dict[str, Any]:
|
||||
postgres: dict[str, Any] = {}
|
||||
try:
|
||||
postgres["used"] = _vm_scalar("sum(pg_stat_activity_count)")
|
||||
postgres["max"] = _vm_scalar("max(pg_settings_max_connections)")
|
||||
postgres["by_db"] = _vm_vector(
|
||||
"topk(5, sum by (datname) (pg_stat_activity_count))"
|
||||
)
|
||||
postgres["hottest_db"] = _vm_topk(
|
||||
"topk(1, sum by (datname) (pg_stat_activity_count))",
|
||||
"datname",
|
||||
)
|
||||
except Exception as exc:
|
||||
errors.append(f"postgres: {exc}")
|
||||
return postgres
|
||||
|
||||
|
||||
def _hottest_nodes(errors: list[str]) -> dict[str, Any]:
|
||||
hottest: dict[str, Any] = {}
|
||||
try:
|
||||
hottest["cpu"] = _vm_topk(
|
||||
f'label_replace(topk(1, avg by (node) (((1 - avg by (instance) (rate(node_cpu_seconds_total{{mode="idle"}}[{_RATE_WINDOW}]))) * 100) '
|
||||
f'* on(instance) group_left(node) label_replace({_NODE_UNAME_LABEL}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||
"node",
|
||||
)
|
||||
hottest["ram"] = _vm_topk(
|
||||
f'label_replace(topk(1, avg by (node) ((avg by (instance) ((node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) '
|
||||
f'/ node_memory_MemTotal_bytes * 100)) * on(instance) group_left(node) label_replace({_NODE_UNAME_LABEL}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||
"node",
|
||||
)
|
||||
hottest["net"] = _vm_topk(
|
||||
f'label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_network_receive_bytes_total{{device!~"lo"}}[{_RATE_WINDOW}]) '
|
||||
f'+ rate(node_network_transmit_bytes_total{{device!~"lo"}}[{_RATE_WINDOW}]))) * on(instance) group_left(node) label_replace({_NODE_UNAME_LABEL}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||
"node",
|
||||
)
|
||||
hottest["io"] = _vm_topk(
|
||||
f'label_replace(topk(1, avg by (node) ((sum by (instance) (rate(node_disk_read_bytes_total[{_RATE_WINDOW}]) + rate(node_disk_written_bytes_total[{_RATE_WINDOW}]))) '
|
||||
f'* on(instance) group_left(node) label_replace({_NODE_UNAME_LABEL}, "node", "$1", "nodename", "(.*)"))), "__name__", "$1", "node", "(.*)")',
|
||||
"node",
|
||||
)
|
||||
except Exception as exc:
|
||||
errors.append(f"hottest: {exc}")
|
||||
return hottest
|
||||
|
||||
|
||||
def _node_usage(errors: list[str]) -> dict[str, Any]:
|
||||
usage: dict[str, Any] = {}
|
||||
try:
|
||||
exprs = _node_usage_exprs()
|
||||
usage["cpu"] = _vm_node_metric(exprs["cpu"], "node")
|
||||
usage["ram"] = _vm_node_metric(exprs["ram"], "node")
|
||||
usage["net"] = _vm_node_metric(exprs["net"], "node")
|
||||
usage["io"] = _vm_node_metric(exprs["io"], "node")
|
||||
usage["disk"] = _vm_node_metric(exprs["disk"], "node")
|
||||
except Exception as exc:
|
||||
errors.append(f"node_usage: {exc}")
|
||||
return usage
|
||||
|
||||
|
||||
def _pvc_usage(errors: list[str]) -> list[dict[str, Any]]:
|
||||
try:
|
||||
entries = _vm_vector(
|
||||
"topk(5, max by (namespace,persistentvolumeclaim) "
|
||||
"(kubelet_volume_stats_used_bytes / kubelet_volume_stats_capacity_bytes * 100))"
|
||||
)
|
||||
return _filter_namespace_vector(entries)
|
||||
except Exception as exc:
|
||||
errors.append(f"pvc_usage: {exc}")
|
||||
return []
|
||||
|
||||
|
||||
def _usage_stats(series: list[dict[str, Any]]) -> dict[str, float]:
|
||||
values: list[float] = []
|
||||
for entry in series:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
try:
|
||||
values.append(float(entry.get("value")))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
if not values:
|
||||
return {}
|
||||
return {
|
||||
"min": min(values),
|
||||
"max": max(values),
|
||||
"avg": sum(values) / len(values),
|
||||
}
|
||||
|
||||
|
||||
def _vm_namespace_totals(expr: str) -> dict[str, float]:
|
||||
totals: dict[str, float] = {}
|
||||
for item in _vm_vector(expr):
|
||||
metric = item.get("metric") if isinstance(item.get("metric"), dict) else {}
|
||||
namespace = metric.get("namespace")
|
||||
if not isinstance(namespace, str) or not namespace:
|
||||
continue
|
||||
try:
|
||||
totals[namespace] = float(item.get("value"))
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
return totals
|
||||
|
||||
|
||||
def _build_namespace_capacity(
|
||||
cpu_usage: dict[str, float],
|
||||
cpu_requests: dict[str, float],
|
||||
mem_usage: dict[str, float],
|
||||
mem_requests: dict[str, float],
|
||||
) -> list[dict[str, Any]]:
|
||||
namespaces = sorted(set(cpu_usage) | set(cpu_requests) | set(mem_usage) | set(mem_requests))
|
||||
output: list[dict[str, Any]] = []
|
||||
for namespace in namespaces:
|
||||
cpu_used = cpu_usage.get(namespace)
|
||||
cpu_req = cpu_requests.get(namespace)
|
||||
mem_used = mem_usage.get(namespace)
|
||||
mem_req = mem_requests.get(namespace)
|
||||
cpu_ratio = None
|
||||
mem_ratio = None
|
||||
if isinstance(cpu_used, (int, float)) and isinstance(cpu_req, (int, float)) and cpu_req > 0:
|
||||
cpu_ratio = cpu_used / cpu_req
|
||||
if isinstance(mem_used, (int, float)) and isinstance(mem_req, (int, float)) and mem_req > 0:
|
||||
mem_ratio = mem_used / mem_req
|
||||
output.append(
|
||||
{
|
||||
"namespace": namespace,
|
||||
"cpu_usage": cpu_used,
|
||||
"cpu_requests": cpu_req,
|
||||
"cpu_usage_ratio": cpu_ratio,
|
||||
"mem_usage": mem_used,
|
||||
"mem_requests": mem_req,
|
||||
"mem_usage_ratio": mem_ratio,
|
||||
}
|
||||
)
|
||||
output.sort(
|
||||
key=lambda item: (
|
||||
-(item.get("cpu_requests") or 0),
|
||||
-(item.get("mem_requests") or 0),
|
||||
item.get("namespace") or "",
|
||||
)
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def _node_usage_profile(
|
||||
node_usage: dict[str, list[dict[str, Any]]],
|
||||
node_details: list[dict[str, Any]],
|
||||
node_pods: list[dict[str, Any]],
|
||||
) -> list[dict[str, Any]]:
|
||||
usage: dict[str, dict[str, Any]] = {}
|
||||
for key in ("cpu", "ram", "disk", "net", "io"):
|
||||
for item in node_usage.get(key, []) or []:
|
||||
node = item.get("node")
|
||||
value = item.get("value")
|
||||
if not isinstance(node, str) or not node:
|
||||
continue
|
||||
if not isinstance(value, (int, float)):
|
||||
continue
|
||||
usage.setdefault(node, {})[key] = float(value)
|
||||
max_values: dict[str, float] = {}
|
||||
for key in ("cpu", "ram", "disk", "net", "io"):
|
||||
values = [entry.get(key) for entry in usage.values() if isinstance(entry.get(key), (int, float))]
|
||||
max_values[key] = max(values) if values else 0.0
|
||||
|
||||
detail_map: dict[str, dict[str, Any]] = {
|
||||
entry.get("name"): entry for entry in node_details if isinstance(entry, dict)
|
||||
}
|
||||
pod_map: dict[str, dict[str, Any]] = {
|
||||
entry.get("node"): entry for entry in node_pods if isinstance(entry, dict)
|
||||
}
|
||||
|
||||
output: list[dict[str, Any]] = []
|
||||
for node, entry in usage.items():
|
||||
detail = detail_map.get(node, {})
|
||||
pressure = detail.get("pressure") if isinstance(detail.get("pressure"), dict) else {}
|
||||
pressure_count = sum(1 for value in pressure.values() if value)
|
||||
taints = detail.get("taints") if isinstance(detail.get("taints"), list) else []
|
||||
unschedulable = bool(detail.get("unschedulable"))
|
||||
pods_total = None
|
||||
pod_entry = pod_map.get(node)
|
||||
if isinstance(pod_entry, dict):
|
||||
pods_total = pod_entry.get("pods_total")
|
||||
|
||||
normalized: dict[str, float] = {}
|
||||
for key in ("cpu", "ram", "disk", "net", "io"):
|
||||
raw = entry.get(key)
|
||||
max_val = max_values.get(key) or 0.0
|
||||
if isinstance(raw, (int, float)) and max_val > 0:
|
||||
normalized[f"{key}_norm"] = raw / max_val
|
||||
norm_values = [v for v in normalized.values() if isinstance(v, (int, float))]
|
||||
load_index = sum(norm_values) / len(norm_values) if norm_values else None
|
||||
output.append(
|
||||
{
|
||||
"node": node,
|
||||
"cpu": entry.get("cpu"),
|
||||
"ram": entry.get("ram"),
|
||||
"disk": entry.get("disk"),
|
||||
"net": entry.get("net"),
|
||||
"io": entry.get("io"),
|
||||
**normalized,
|
||||
"pressure_flags": pressure,
|
||||
"pressure_count": pressure_count,
|
||||
"taints": taints,
|
||||
"unschedulable": unschedulable,
|
||||
"pods_total": pods_total,
|
||||
"load_index": load_index,
|
||||
}
|
||||
)
|
||||
output.sort(key=lambda item: (-(item.get("load_index") or 0), item.get("node") or ""))
|
||||
return output
|
||||
|
||||
|
||||
def _percentile(values: list[float], percentile: float) -> float | None:
|
||||
if not values:
|
||||
return None
|
||||
ordered = sorted(values)
|
||||
idx = int(round((len(ordered) - 1) * percentile))
|
||||
idx = min(max(idx, 0), len(ordered) - 1)
|
||||
return ordered[idx]
|
||||
|
||||
|
||||
def _node_load_summary(node_load: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
items = [
|
||||
entry
|
||||
for entry in node_load
|
||||
if isinstance(entry, dict) and isinstance(entry.get("load_index"), (int, float))
|
||||
]
|
||||
if not items:
|
||||
return {}
|
||||
values = [float(entry.get("load_index") or 0) for entry in items]
|
||||
avg = sum(values) / len(values)
|
||||
variance = sum((value - avg) ** 2 for value in values) / len(values)
|
||||
stddev = variance**0.5
|
||||
top = sorted(items, key=lambda item: -(item.get("load_index") or 0))[:_LOAD_TOP_COUNT]
|
||||
bottom = sorted(items, key=lambda item: (item.get("load_index") or 0))[:_LOAD_TOP_COUNT]
|
||||
outliers = [
|
||||
item
|
||||
for item in items
|
||||
if isinstance(item.get("load_index"), (int, float))
|
||||
and item.get("load_index") >= avg + stddev
|
||||
]
|
||||
outliers.sort(key=lambda item: -(item.get("load_index") or 0))
|
||||
return {
|
||||
"avg": round(avg, 3),
|
||||
"p90": round(_percentile(values, 0.9) or 0.0, 3),
|
||||
"min": round(min(values), 3),
|
||||
"max": round(max(values), 3),
|
||||
"top": top,
|
||||
"bottom": bottom,
|
||||
"outliers": outliers[:_LOAD_TOP_COUNT],
|
||||
}
|
||||
|
||||
|
||||
def _namespace_capacity_summary(capacity: list[dict[str, Any]]) -> dict[str, Any]:
|
||||
if not capacity:
|
||||
return {}
|
||||
cpu_ratio = [
|
||||
entry
|
||||
for entry in capacity
|
||||
if isinstance(entry, dict) and isinstance(entry.get("cpu_usage_ratio"), (int, float))
|
||||
]
|
||||
mem_ratio = [
|
||||
entry
|
||||
for entry in capacity
|
||||
if isinstance(entry, dict) and isinstance(entry.get("mem_usage_ratio"), (int, float))
|
||||
]
|
||||
cpu_ratio.sort(key=lambda item: -(item.get("cpu_usage_ratio") or 0))
|
||||
mem_ratio.sort(key=lambda item: -(item.get("mem_usage_ratio") or 0))
|
||||
cpu_headroom: list[dict[str, Any]] = []
|
||||
mem_headroom: list[dict[str, Any]] = []
|
||||
for entry in capacity:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
cpu_used = entry.get("cpu_usage")
|
||||
cpu_req = entry.get("cpu_requests")
|
||||
mem_used = entry.get("mem_usage")
|
||||
mem_req = entry.get("mem_requests")
|
||||
if isinstance(cpu_used, (int, float)) and isinstance(cpu_req, (int, float)):
|
||||
cpu_headroom.append(
|
||||
{
|
||||
"namespace": entry.get("namespace"),
|
||||
"headroom": cpu_req - cpu_used,
|
||||
"usage": cpu_used,
|
||||
"requests": cpu_req,
|
||||
"ratio": entry.get("cpu_usage_ratio"),
|
||||
}
|
||||
)
|
||||
if isinstance(mem_used, (int, float)) and isinstance(mem_req, (int, float)):
|
||||
mem_headroom.append(
|
||||
{
|
||||
"namespace": entry.get("namespace"),
|
||||
"headroom": mem_req - mem_used,
|
||||
"usage": mem_used,
|
||||
"requests": mem_req,
|
||||
"ratio": entry.get("mem_usage_ratio"),
|
||||
}
|
||||
)
|
||||
cpu_headroom.sort(key=lambda item: (item.get("headroom") or 0))
|
||||
mem_headroom.sort(key=lambda item: (item.get("headroom") or 0))
|
||||
cpu_over_names = [
|
||||
entry.get("namespace")
|
||||
for entry in cpu_ratio
|
||||
if (entry.get("cpu_usage_ratio") or 0) > 1 and entry.get("namespace")
|
||||
]
|
||||
mem_over_names = [
|
||||
entry.get("namespace")
|
||||
for entry in mem_ratio
|
||||
if (entry.get("mem_usage_ratio") or 0) > 1 and entry.get("namespace")
|
||||
]
|
||||
over_cpu = len(cpu_over_names)
|
||||
over_mem = len(mem_over_names)
|
||||
return {
|
||||
"cpu_ratio_top": cpu_ratio[:_NAMESPACE_TOP_COUNT],
|
||||
"mem_ratio_top": mem_ratio[:_NAMESPACE_TOP_COUNT],
|
||||
"cpu_headroom_low": cpu_headroom[:_NAMESPACE_TOP_COUNT],
|
||||
"mem_headroom_low": mem_headroom[:_NAMESPACE_TOP_COUNT],
|
||||
"cpu_overcommitted": over_cpu,
|
||||
"mem_overcommitted": over_mem,
|
||||
"cpu_overcommitted_names": sorted({name for name in cpu_over_names if isinstance(name, str)}),
|
||||
"mem_overcommitted_names": sorted({name for name in mem_over_names if isinstance(name, str)}),
|
||||
}
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
249
ariadne/services/cluster_state_workloads.py
Normal file
249
ariadne/services/cluster_state_workloads.py
Normal file
@ -0,0 +1,249 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .cluster_state_contract import *
|
||||
from .cluster_state_nodes import *
|
||||
|
||||
def _summarize_jobs(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
totals = {"total": 0, "active": 0, "failed": 0, "succeeded": 0}
|
||||
by_namespace: dict[str, dict[str, int]] = {}
|
||||
failing: list[dict[str, Any]] = []
|
||||
active_oldest: list[dict[str, Any]] = []
|
||||
for job in _items(payload):
|
||||
metadata = job.get("metadata") if isinstance(job.get("metadata"), dict) else {}
|
||||
status = job.get("status") if isinstance(job.get("status"), dict) else {}
|
||||
name = metadata.get("name") if isinstance(metadata.get("name"), str) else ""
|
||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
||||
created_at = (
|
||||
metadata.get("creationTimestamp")
|
||||
if isinstance(metadata.get("creationTimestamp"), str)
|
||||
else ""
|
||||
)
|
||||
if not name or not namespace:
|
||||
continue
|
||||
active = int(status.get("active") or 0)
|
||||
failed = int(status.get("failed") or 0)
|
||||
succeeded = int(status.get("succeeded") or 0)
|
||||
totals["total"] += 1
|
||||
totals["active"] += active
|
||||
totals["failed"] += failed
|
||||
totals["succeeded"] += succeeded
|
||||
entry = by_namespace.setdefault(namespace, {"active": 0, "failed": 0, "succeeded": 0})
|
||||
entry["active"] += active
|
||||
entry["failed"] += failed
|
||||
entry["succeeded"] += succeeded
|
||||
age_hours = _age_hours(created_at)
|
||||
if failed > 0:
|
||||
failing.append(
|
||||
{
|
||||
"namespace": namespace,
|
||||
"job": name,
|
||||
"failed": failed,
|
||||
"age_hours": age_hours,
|
||||
}
|
||||
)
|
||||
if active > 0 and age_hours is not None:
|
||||
active_oldest.append(
|
||||
{
|
||||
"namespace": namespace,
|
||||
"job": name,
|
||||
"active": active,
|
||||
"age_hours": age_hours,
|
||||
}
|
||||
)
|
||||
failing.sort(
|
||||
key=lambda item: (
|
||||
-(item.get("failed") or 0),
|
||||
-(item.get("age_hours") or 0.0),
|
||||
item.get("namespace") or "",
|
||||
item.get("job") or "",
|
||||
)
|
||||
)
|
||||
active_oldest.sort(key=lambda item: -(item.get("age_hours") or 0.0))
|
||||
namespace_summary = [
|
||||
{
|
||||
"namespace": ns,
|
||||
"active": stats.get("active", 0),
|
||||
"failed": stats.get("failed", 0),
|
||||
"succeeded": stats.get("succeeded", 0),
|
||||
}
|
||||
for ns, stats in by_namespace.items()
|
||||
]
|
||||
namespace_summary.sort(
|
||||
key=lambda item: (
|
||||
-(item.get("active") or 0),
|
||||
-(item.get("failed") or 0),
|
||||
item.get("namespace") or "",
|
||||
)
|
||||
)
|
||||
return {
|
||||
"totals": totals,
|
||||
"by_namespace": namespace_summary[:20],
|
||||
"failing": failing[:20],
|
||||
"active_oldest": active_oldest[:20],
|
||||
}
|
||||
|
||||
|
||||
def _summarize_deployments(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
items = _items(payload)
|
||||
unhealthy: list[dict[str, Any]] = []
|
||||
for dep in items:
|
||||
metadata = dep.get("metadata") if isinstance(dep.get("metadata"), dict) else {}
|
||||
spec = dep.get("spec") if isinstance(dep.get("spec"), dict) else {}
|
||||
status = dep.get("status") if isinstance(dep.get("status"), dict) else {}
|
||||
name = metadata.get("name") if isinstance(metadata.get("name"), str) else ""
|
||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
||||
desired = int(spec.get("replicas") or 0)
|
||||
ready = int(status.get("readyReplicas") or 0)
|
||||
available = int(status.get("availableReplicas") or 0)
|
||||
updated = int(status.get("updatedReplicas") or 0)
|
||||
if desired <= 0:
|
||||
continue
|
||||
if ready < desired or available < desired:
|
||||
unhealthy.append(
|
||||
{
|
||||
"name": name,
|
||||
"namespace": namespace,
|
||||
"desired": desired,
|
||||
"ready": ready,
|
||||
"available": available,
|
||||
"updated": updated,
|
||||
}
|
||||
)
|
||||
unhealthy.sort(key=lambda item: (item.get("namespace") or "", item.get("name") or ""))
|
||||
return {
|
||||
"total": len(items),
|
||||
"not_ready": len(unhealthy),
|
||||
"items": unhealthy,
|
||||
}
|
||||
|
||||
|
||||
def _summarize_statefulsets(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
items = _items(payload)
|
||||
unhealthy: list[dict[str, Any]] = []
|
||||
for st in items:
|
||||
metadata = st.get("metadata") if isinstance(st.get("metadata"), dict) else {}
|
||||
spec = st.get("spec") if isinstance(st.get("spec"), dict) else {}
|
||||
status = st.get("status") if isinstance(st.get("status"), dict) else {}
|
||||
name = metadata.get("name") if isinstance(metadata.get("name"), str) else ""
|
||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
||||
desired = int(spec.get("replicas") or 0)
|
||||
ready = int(status.get("readyReplicas") or 0)
|
||||
current = int(status.get("currentReplicas") or 0)
|
||||
updated = int(status.get("updatedReplicas") or 0)
|
||||
if desired <= 0:
|
||||
continue
|
||||
if ready < desired:
|
||||
unhealthy.append(
|
||||
{
|
||||
"name": name,
|
||||
"namespace": namespace,
|
||||
"desired": desired,
|
||||
"ready": ready,
|
||||
"current": current,
|
||||
"updated": updated,
|
||||
}
|
||||
)
|
||||
unhealthy.sort(key=lambda item: (item.get("namespace") or "", item.get("name") or ""))
|
||||
return {
|
||||
"total": len(items),
|
||||
"not_ready": len(unhealthy),
|
||||
"items": unhealthy,
|
||||
}
|
||||
|
||||
|
||||
def _summarize_daemonsets(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
items = _items(payload)
|
||||
unhealthy: list[dict[str, Any]] = []
|
||||
for ds in items:
|
||||
metadata = ds.get("metadata") if isinstance(ds.get("metadata"), dict) else {}
|
||||
status = ds.get("status") if isinstance(ds.get("status"), dict) else {}
|
||||
name = metadata.get("name") if isinstance(metadata.get("name"), str) else ""
|
||||
namespace = metadata.get("namespace") if isinstance(metadata.get("namespace"), str) else ""
|
||||
desired = int(status.get("desiredNumberScheduled") or 0)
|
||||
ready = int(status.get("numberReady") or 0)
|
||||
updated = int(status.get("updatedNumberScheduled") or 0)
|
||||
if desired <= 0:
|
||||
continue
|
||||
if ready < desired:
|
||||
unhealthy.append(
|
||||
{
|
||||
"name": name,
|
||||
"namespace": namespace,
|
||||
"desired": desired,
|
||||
"ready": ready,
|
||||
"updated": updated,
|
||||
}
|
||||
)
|
||||
unhealthy.sort(key=lambda item: (item.get("namespace") or "", item.get("name") or ""))
|
||||
return {
|
||||
"total": len(items),
|
||||
"not_ready": len(unhealthy),
|
||||
"items": unhealthy,
|
||||
}
|
||||
|
||||
|
||||
def _summarize_workload_health(
|
||||
deployments: dict[str, Any],
|
||||
statefulsets: dict[str, Any],
|
||||
daemonsets: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"deployments": deployments,
|
||||
"statefulsets": statefulsets,
|
||||
"daemonsets": daemonsets,
|
||||
}
|
||||
|
||||
def _summarize_longhorn_volumes(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
items = _items(payload)
|
||||
if not items:
|
||||
return {}
|
||||
by_state: dict[str, int] = {}
|
||||
by_robustness: dict[str, int] = {}
|
||||
degraded: list[dict[str, Any]] = []
|
||||
attached_count = 0
|
||||
detached_count = 0
|
||||
degraded_count = 0
|
||||
for volume in items:
|
||||
metadata = volume.get("metadata") if isinstance(volume.get("metadata"), dict) else {}
|
||||
status = volume.get("status") if isinstance(volume.get("status"), dict) else {}
|
||||
spec = volume.get("spec") if isinstance(volume.get("spec"), dict) else {}
|
||||
name = metadata.get("name") if isinstance(metadata.get("name"), str) else ""
|
||||
if not name:
|
||||
continue
|
||||
state = status.get("state") if isinstance(status.get("state"), str) else "unknown"
|
||||
robustness = (
|
||||
status.get("robustness") if isinstance(status.get("robustness"), str) else "unknown"
|
||||
)
|
||||
state_lower = state.lower()
|
||||
robustness_lower = robustness.lower()
|
||||
by_state[state] = by_state.get(state, 0) + 1
|
||||
by_robustness[robustness] = by_robustness.get(robustness, 0) + 1
|
||||
if state_lower == "attached":
|
||||
attached_count += 1
|
||||
elif state_lower == "detached":
|
||||
detached_count += 1
|
||||
if robustness_lower in {"degraded", "faulted"}:
|
||||
degraded_count += 1
|
||||
degraded.append(
|
||||
{
|
||||
"name": name,
|
||||
"state": state,
|
||||
"robustness": robustness,
|
||||
"size": spec.get("size"),
|
||||
"actual_size": status.get("actualSize"),
|
||||
}
|
||||
)
|
||||
degraded.sort(key=lambda item: item.get("name") or "")
|
||||
return {
|
||||
"total": len(items),
|
||||
"by_state": by_state,
|
||||
"by_robustness": by_robustness,
|
||||
"attached_count": attached_count,
|
||||
"detached_count": detached_count,
|
||||
"degraded": degraded,
|
||||
"degraded_count": degraded_count,
|
||||
}
|
||||
|
||||
__all__ = [name for name in globals() if (name.startswith("_") and not name.startswith("__")) or name in {"ClusterStateSummary", "SignalContext"}]
|
||||
@ -1,93 +1,26 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import base64
|
||||
import time
|
||||
import urllib.parse
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
import psycopg
|
||||
|
||||
from ..settings import settings
|
||||
from ..utils.logging import get_logger
|
||||
from ..utils.name_generator import NameGenerator
|
||||
from .comms_guest_names import _CommsGuestNameMixin
|
||||
from .comms_protocol import _canon_user
|
||||
from .comms_room_ops import _CommsRoomOpsMixin
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
class CommsService(_CommsGuestNameMixin, _CommsRoomOpsMixin):
|
||||
"""Maintain Matrix/MAS guest naming and room hygiene.
|
||||
|
||||
HTTP_OK = 200
|
||||
HTTP_CREATED = 201
|
||||
HTTP_ACCEPTED = 202
|
||||
HTTP_NO_CONTENT = 204
|
||||
HTTP_BAD_REQUEST = 400
|
||||
HTTP_NOT_FOUND = 404
|
||||
HTTP_CONFLICT = 409
|
||||
Inputs: Matrix/MAS endpoints, service credentials, and optional database access
|
||||
from settings. Outputs: scheduled maintenance actions plus small status dicts
|
||||
for scheduler logging.
|
||||
"""
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CommsSummary:
|
||||
processed: int
|
||||
renamed: int
|
||||
pruned: int
|
||||
skipped: int
|
||||
detail: str = ""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MasGuestResult:
|
||||
renamed: int
|
||||
skipped: int
|
||||
usernames: set[str]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SynapseGuestResult:
|
||||
renamed: int
|
||||
pruned: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DisplayNameTarget:
|
||||
room_id: str
|
||||
user_id: str
|
||||
name: str
|
||||
in_room: bool
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SynapseUserRef:
|
||||
entry: dict[str, Any]
|
||||
user_id: str
|
||||
localpart: str
|
||||
|
||||
|
||||
def _auth(token: str) -> dict[str, str]:
|
||||
return {"Authorization": f"Bearer {token}"}
|
||||
|
||||
|
||||
def _canon_user(user: str, server_name: str) -> str:
|
||||
user = (user or "").strip()
|
||||
if user.startswith("@") and ":" in user:
|
||||
return user
|
||||
user = user.lstrip("@")
|
||||
if ":" in user:
|
||||
return f"@{user}"
|
||||
return f"@{user}:{server_name}"
|
||||
|
||||
|
||||
def _needs_rename_username(username: str) -> bool:
|
||||
return username.isdigit() or username.startswith("guest-")
|
||||
|
||||
|
||||
def _needs_rename_display(display: str | None) -> bool:
|
||||
if not display:
|
||||
return True
|
||||
return display.isdigit() or display.startswith("guest-")
|
||||
|
||||
|
||||
|
||||
|
||||
class CommsService:
|
||||
def __init__(
|
||||
self,
|
||||
client_factory: type[httpx.Client] = httpx.Client,
|
||||
@ -96,6 +29,10 @@ class CommsService:
|
||||
self._client_factory = client_factory
|
||||
self._name_generator = name_generator or NameGenerator()
|
||||
|
||||
@property
|
||||
def _settings(self) -> Any:
|
||||
return settings
|
||||
|
||||
def _pick_guest_name(self, existing: set[str]) -> str | None:
|
||||
return self._name_generator.unique(existing)
|
||||
|
||||
@ -106,838 +43,22 @@ class CommsService:
|
||||
token = getattr(settings, "comms_synapse_admin_token", "")
|
||||
return token if token else fallback
|
||||
|
||||
def _mas_admin_token(self, client: httpx.Client) -> str:
|
||||
if not settings.comms_mas_admin_client_id or not settings.comms_mas_admin_client_secret:
|
||||
raise RuntimeError("mas admin client credentials missing")
|
||||
basic = base64.b64encode(
|
||||
f"{settings.comms_mas_admin_client_id}:{settings.comms_mas_admin_client_secret}".encode()
|
||||
).decode()
|
||||
last_err: Exception | None = None
|
||||
for attempt in range(5):
|
||||
try:
|
||||
resp = client.post(
|
||||
settings.comms_mas_token_url,
|
||||
headers={"Authorization": f"Basic {basic}"},
|
||||
data={"grant_type": "client_credentials", "scope": "urn:mas:admin"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
token = payload.get("access_token")
|
||||
if not isinstance(token, str) or not token:
|
||||
raise RuntimeError("missing mas access token")
|
||||
return token
|
||||
except Exception as exc: # noqa: BLE001
|
||||
last_err = exc
|
||||
time.sleep(2**attempt)
|
||||
raise RuntimeError(str(last_err) if last_err else "mas admin token failed")
|
||||
|
||||
def _mas_user_id(self, client: httpx.Client, token: str, username: str) -> str:
|
||||
url = f"{settings.comms_mas_admin_api_base}/users/by-username/{urllib.parse.quote(username)}"
|
||||
resp = client.get(url, headers=_auth(token))
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
return payload["data"]["id"]
|
||||
|
||||
def _mas_personal_session(self, client: httpx.Client, token: str, user_id: str) -> tuple[str, str]:
|
||||
resp = client.post(
|
||||
f"{settings.comms_mas_admin_api_base}/personal-sessions",
|
||||
headers=_auth(token),
|
||||
json={
|
||||
"actor_user_id": user_id,
|
||||
"human_name": "guest-name-randomizer",
|
||||
"scope": "urn:matrix:client:api:*",
|
||||
"expires_in": 300,
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json().get("data", {})
|
||||
session_id = payload.get("id")
|
||||
attrs = (payload.get("attributes") or {}) if isinstance(payload, dict) else {}
|
||||
access_token = attrs.get("access_token")
|
||||
if not isinstance(access_token, str) or not isinstance(session_id, str):
|
||||
raise RuntimeError("invalid personal session response")
|
||||
return access_token, session_id
|
||||
|
||||
def _mas_revoke_session(self, client: httpx.Client, token: str, session_id: str) -> None:
|
||||
try:
|
||||
client.post(
|
||||
f"{settings.comms_mas_admin_api_base}/personal-sessions/{urllib.parse.quote(session_id)}/revoke",
|
||||
headers=_auth(token),
|
||||
json={},
|
||||
)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def _resolve_alias(self, client: httpx.Client, token: str, alias: str) -> str:
|
||||
resp = client.get(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/directory/room/{urllib.parse.quote(alias)}",
|
||||
headers=_auth(token),
|
||||
)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
return payload["room_id"]
|
||||
|
||||
def _room_members(self, client: httpx.Client, token: str, room_id: str) -> tuple[set[str], set[str]]:
|
||||
resp = client.get(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members",
|
||||
headers=_auth(token),
|
||||
)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
members: set[str] = set()
|
||||
existing: set[str] = set()
|
||||
for ev in payload.get("chunk", []) or []:
|
||||
user_id = ev.get("state_key")
|
||||
if isinstance(user_id, str) and user_id:
|
||||
members.add(user_id)
|
||||
display = (ev.get("content") or {}).get("displayname")
|
||||
if isinstance(display, str) and display:
|
||||
existing.add(display)
|
||||
return members, existing
|
||||
|
||||
def _mas_list_users(self, client: httpx.Client, token: str) -> list[dict[str, Any]]:
|
||||
users: list[dict[str, Any]] = []
|
||||
cursor = None
|
||||
while True:
|
||||
url = f"{settings.comms_mas_admin_api_base}/users?page[size]=100"
|
||||
if cursor:
|
||||
url += f"&page[after]={urllib.parse.quote(cursor)}"
|
||||
resp = client.get(url, headers=_auth(token))
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
data = payload.get("data") or []
|
||||
if not isinstance(data, list) or not data:
|
||||
break
|
||||
users.extend([item for item in data if isinstance(item, dict)])
|
||||
last = data[-1]
|
||||
cursor = (
|
||||
last.get("meta", {})
|
||||
if isinstance(last, dict)
|
||||
else {}
|
||||
).get("page", {}).get("cursor")
|
||||
if not cursor:
|
||||
break
|
||||
return users
|
||||
|
||||
def _synapse_list_users(self, client: httpx.Client, token: str) -> list[dict[str, Any]]:
|
||||
users: list[dict[str, Any]] = []
|
||||
from_token = None
|
||||
admin_token = self._admin_token(token)
|
||||
while True:
|
||||
url = "{}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100".format(
|
||||
settings.comms_synapse_base
|
||||
)
|
||||
if from_token:
|
||||
url += f"&from={urllib.parse.quote(from_token)}"
|
||||
resp = client.get(url, headers=_auth(admin_token))
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
users.extend([item for item in payload.get("users", []) if isinstance(item, dict)])
|
||||
from_token = payload.get("next_token")
|
||||
if not from_token:
|
||||
break
|
||||
return users
|
||||
|
||||
def _should_prune_guest(self, entry: dict[str, Any], now_ms: int) -> bool:
|
||||
if not entry.get("is_guest"):
|
||||
return False
|
||||
last_seen = entry.get("last_seen_ts")
|
||||
if last_seen is None:
|
||||
return False
|
||||
try:
|
||||
last_seen = int(last_seen)
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
stale_ms = int(settings.comms_guest_stale_days) * 24 * 60 * 60 * 1000
|
||||
return now_ms - last_seen > stale_ms
|
||||
|
||||
def _prune_guest(self, client: httpx.Client, token: str, user_id: str) -> bool:
|
||||
admin_token = self._admin_token(token)
|
||||
try:
|
||||
resp = client.delete(
|
||||
f"{settings.comms_synapse_base}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
|
||||
headers=_auth(admin_token),
|
||||
params={"erase": "true"},
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.info(
|
||||
"guest prune failed",
|
||||
extra={"event": "comms_guest_prune", "status": "error", "detail": str(exc)},
|
||||
)
|
||||
return False
|
||||
if resp.status_code in (HTTP_OK, HTTP_ACCEPTED, HTTP_NO_CONTENT, HTTP_NOT_FOUND):
|
||||
return True
|
||||
logger.info(
|
||||
"guest prune failed",
|
||||
extra={
|
||||
"event": "comms_guest_prune",
|
||||
"status": "error",
|
||||
"detail": f"{resp.status_code} {resp.text}",
|
||||
},
|
||||
)
|
||||
return False
|
||||
|
||||
def _get_displayname(self, client: httpx.Client, token: str, user_id: str) -> str | None:
|
||||
resp = client.get(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}",
|
||||
headers=_auth(token),
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("displayname")
|
||||
|
||||
def _get_displayname_admin(self, client: httpx.Client, token: str, user_id: str) -> str | None:
|
||||
admin_token = self._admin_token(token)
|
||||
resp = client.get(
|
||||
f"{settings.comms_synapse_base}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
|
||||
headers=_auth(admin_token),
|
||||
)
|
||||
if resp.status_code == HTTP_NOT_FOUND:
|
||||
return None
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("displayname")
|
||||
|
||||
def _set_displayname(
|
||||
self,
|
||||
client: httpx.Client,
|
||||
token: str,
|
||||
target: DisplayNameTarget,
|
||||
) -> None:
|
||||
resp = client.put(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/profile/{urllib.parse.quote(target.user_id)}/displayname",
|
||||
headers=_auth(token),
|
||||
json={"displayname": target.name},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
if not target.in_room:
|
||||
return
|
||||
state_url = (
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(target.room_id)}"
|
||||
f"/state/m.room.member/{urllib.parse.quote(target.user_id)}"
|
||||
)
|
||||
client.put(
|
||||
state_url,
|
||||
headers=_auth(token),
|
||||
json={"membership": "join", "displayname": target.name},
|
||||
)
|
||||
|
||||
def _set_displayname_admin(self, client: httpx.Client, token: str, user_id: str, name: str) -> bool:
|
||||
admin_token = self._admin_token(token)
|
||||
resp = client.put(
|
||||
f"{settings.comms_synapse_base}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
|
||||
headers=_auth(admin_token),
|
||||
json={"displayname": name},
|
||||
)
|
||||
return resp.status_code in (HTTP_OK, HTTP_CREATED, HTTP_NO_CONTENT)
|
||||
|
||||
def _db_rename_numeric(self, existing: set[str]) -> int:
|
||||
if not settings.comms_synapse_db_password:
|
||||
return 0
|
||||
renamed = 0
|
||||
conn = psycopg.connect(
|
||||
def _connect_synapse_db(self) -> Any:
|
||||
return psycopg.connect(
|
||||
host=settings.comms_synapse_db_host,
|
||||
port=settings.comms_synapse_db_port,
|
||||
dbname=settings.comms_synapse_db_name,
|
||||
user=settings.comms_synapse_db_user,
|
||||
password=settings.comms_synapse_db_password,
|
||||
)
|
||||
try:
|
||||
with conn:
|
||||
with conn.cursor() as cur:
|
||||
pattern = f"^@\\d+:{settings.comms_server_name}$"
|
||||
cur.execute(
|
||||
"SELECT user_id, full_user_id, displayname FROM profiles WHERE full_user_id ~ %s",
|
||||
(pattern,),
|
||||
)
|
||||
profile_rows = cur.fetchall()
|
||||
profile_index = {row[1]: row for row in profile_rows}
|
||||
for _user_id, full_user_id, display in profile_rows:
|
||||
if display and not _needs_rename_display(display):
|
||||
continue
|
||||
new_name = self._pick_guest_name(existing)
|
||||
if not new_name:
|
||||
continue
|
||||
cur.execute(
|
||||
"UPDATE profiles SET displayname = %s WHERE full_user_id = %s",
|
||||
(new_name, full_user_id),
|
||||
)
|
||||
renamed += 1
|
||||
|
||||
cur.execute(
|
||||
"SELECT name FROM users WHERE name ~ %s",
|
||||
(pattern,),
|
||||
)
|
||||
users = [row[0] for row in cur.fetchall()]
|
||||
if not users:
|
||||
return renamed
|
||||
cur.execute(
|
||||
"SELECT user_id, full_user_id FROM profiles WHERE full_user_id = ANY(%s)",
|
||||
(users,),
|
||||
)
|
||||
for existing_full in cur.fetchall():
|
||||
profile_index.setdefault(existing_full[1], existing_full)
|
||||
def _sleep(self, seconds: float) -> None:
|
||||
time.sleep(seconds)
|
||||
|
||||
for full_user_id in users:
|
||||
if full_user_id in profile_index:
|
||||
continue
|
||||
localpart = full_user_id.split(":", 1)[0].lstrip("@")
|
||||
new_name = self._pick_guest_name(existing)
|
||||
if not new_name:
|
||||
continue
|
||||
cur.execute(
|
||||
"INSERT INTO profiles (user_id, displayname, full_user_id) VALUES (%s, %s, %s) "
|
||||
"ON CONFLICT (full_user_id) DO UPDATE SET displayname = EXCLUDED.displayname",
|
||||
(localpart, new_name, full_user_id),
|
||||
)
|
||||
renamed += 1
|
||||
finally:
|
||||
conn.close()
|
||||
return renamed
|
||||
|
||||
def _validate_guest_name_settings(self) -> None:
|
||||
if not settings.comms_mas_admin_client_id or not settings.comms_mas_admin_client_secret:
|
||||
raise RuntimeError("comms mas admin secret missing")
|
||||
if not settings.comms_synapse_base:
|
||||
raise RuntimeError("comms synapse base missing")
|
||||
|
||||
def _room_context(self, client: httpx.Client, token: str) -> tuple[str, set[str], set[str]]:
|
||||
room_id = self._resolve_alias(client, token, settings.comms_room_alias)
|
||||
members, existing = self._room_members(client, token, room_id)
|
||||
return room_id, members, existing
|
||||
|
||||
def _rename_mas_guests(
|
||||
self,
|
||||
client: httpx.Client,
|
||||
admin_token: str,
|
||||
room_id: str,
|
||||
members: set[str],
|
||||
existing: set[str],
|
||||
) -> MasGuestResult:
|
||||
renamed = 0
|
||||
skipped = 0
|
||||
mas_usernames: set[str] = set()
|
||||
users = self._mas_list_users(client, admin_token)
|
||||
for user in users:
|
||||
attrs = user.get("attributes") or {}
|
||||
username = attrs.get("username") or ""
|
||||
if isinstance(username, str) and username:
|
||||
mas_usernames.add(username)
|
||||
legacy_guest = attrs.get("legacy_guest")
|
||||
if not isinstance(username, str) or not username:
|
||||
skipped += 1
|
||||
continue
|
||||
if not (legacy_guest or _needs_rename_username(username)):
|
||||
skipped += 1
|
||||
continue
|
||||
user_id = user.get("id")
|
||||
if not isinstance(user_id, str) or not user_id:
|
||||
skipped += 1
|
||||
continue
|
||||
full_user = f"@{username}:{settings.comms_server_name}"
|
||||
access_token, session_id = self._mas_personal_session(client, admin_token, user_id)
|
||||
try:
|
||||
display = self._get_displayname(client, access_token, full_user)
|
||||
if display and not _needs_rename_display(display):
|
||||
skipped += 1
|
||||
continue
|
||||
new_name = self._pick_guest_name(existing)
|
||||
if not new_name:
|
||||
skipped += 1
|
||||
continue
|
||||
self._set_displayname(
|
||||
client,
|
||||
access_token,
|
||||
DisplayNameTarget(
|
||||
room_id=room_id,
|
||||
user_id=full_user,
|
||||
name=new_name,
|
||||
in_room=full_user in members,
|
||||
),
|
||||
)
|
||||
renamed += 1
|
||||
finally:
|
||||
self._mas_revoke_session(client, admin_token, session_id)
|
||||
return MasGuestResult(renamed=renamed, skipped=skipped, usernames=mas_usernames)
|
||||
|
||||
def _synapse_entries(self, client: httpx.Client, token: str) -> list[dict[str, Any]]:
|
||||
try:
|
||||
return self._synapse_list_users(client, token)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.info(
|
||||
"synapse admin list skipped",
|
||||
extra={"event": "comms_guest_list", "status": "error", "detail": str(exc)},
|
||||
)
|
||||
return []
|
||||
|
||||
def _synapse_user_id(self, entry: dict[str, Any]) -> SynapseUserRef | None:
|
||||
user_id = entry.get("name") or ""
|
||||
if not isinstance(user_id, str) or not user_id.startswith("@"):
|
||||
return None
|
||||
localpart = user_id.split(":", 1)[0].lstrip("@")
|
||||
return SynapseUserRef(entry=entry, user_id=user_id, localpart=localpart)
|
||||
|
||||
def _maybe_prune_synapse_guest(
|
||||
self,
|
||||
client: httpx.Client,
|
||||
token: str,
|
||||
entry: dict[str, Any],
|
||||
user_id: str,
|
||||
now_ms: int,
|
||||
) -> bool:
|
||||
if not entry.get("is_guest"):
|
||||
return False
|
||||
if not self._should_prune_guest(entry, now_ms):
|
||||
return False
|
||||
return self._prune_guest(client, token, user_id)
|
||||
|
||||
def _needs_synapse_rename(
|
||||
self,
|
||||
client: httpx.Client,
|
||||
token: str,
|
||||
user: SynapseUserRef,
|
||||
mas_usernames: set[str],
|
||||
) -> bool:
|
||||
if user.localpart in mas_usernames:
|
||||
return False
|
||||
is_guest = user.entry.get("is_guest")
|
||||
if not (is_guest or _needs_rename_username(user.localpart)):
|
||||
return False
|
||||
display = self._get_displayname_admin(client, token, user.user_id)
|
||||
if display and not _needs_rename_display(display):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _rename_synapse_user(
|
||||
self,
|
||||
client: httpx.Client,
|
||||
token: str,
|
||||
existing: set[str],
|
||||
user_id: str,
|
||||
) -> bool:
|
||||
new_name = self._pick_guest_name(existing)
|
||||
if not new_name:
|
||||
return False
|
||||
return self._set_displayname_admin(client, token, user_id, new_name)
|
||||
|
||||
def _rename_synapse_guests(
|
||||
self,
|
||||
client: httpx.Client,
|
||||
token: str,
|
||||
existing: set[str],
|
||||
mas_usernames: set[str],
|
||||
) -> SynapseGuestResult:
|
||||
renamed = 0
|
||||
pruned = 0
|
||||
entries = self._synapse_entries(client, token)
|
||||
|
||||
now_ms = int(time.time() * 1000)
|
||||
for entry in entries:
|
||||
user_ref = self._synapse_user_id(entry)
|
||||
if not user_ref:
|
||||
continue
|
||||
if self._maybe_prune_synapse_guest(client, token, user_ref.entry, user_ref.user_id, now_ms):
|
||||
pruned += 1
|
||||
continue
|
||||
if not self._needs_synapse_rename(client, token, user_ref, mas_usernames):
|
||||
continue
|
||||
if self._rename_synapse_user(client, token, existing, user_ref.user_id):
|
||||
renamed += 1
|
||||
return SynapseGuestResult(renamed=renamed, pruned=pruned)
|
||||
|
||||
def run_guest_name_randomizer(self, wait: bool = True) -> dict[str, Any]:
|
||||
self._validate_guest_name_settings()
|
||||
|
||||
with self._client() as client:
|
||||
admin_token = self._mas_admin_token(client)
|
||||
seeder_id = self._mas_user_id(client, admin_token, settings.comms_seeder_user)
|
||||
seeder_token, seeder_session = self._mas_personal_session(client, admin_token, seeder_id)
|
||||
try:
|
||||
room_id, members, existing = self._room_context(client, seeder_token)
|
||||
mas_result = self._rename_mas_guests(client, admin_token, room_id, members, existing)
|
||||
synapse_result = self._rename_synapse_guests(
|
||||
client,
|
||||
seeder_token,
|
||||
existing,
|
||||
mas_result.usernames,
|
||||
)
|
||||
db_renamed = self._db_rename_numeric(existing)
|
||||
finally:
|
||||
self._mas_revoke_session(client, admin_token, seeder_session)
|
||||
|
||||
renamed = mas_result.renamed + synapse_result.renamed + db_renamed
|
||||
pruned = synapse_result.pruned
|
||||
skipped = mas_result.skipped
|
||||
processed = renamed + pruned + skipped
|
||||
summary = CommsSummary(processed, renamed, pruned, skipped)
|
||||
logger.info(
|
||||
"comms guest name sync finished",
|
||||
extra={
|
||||
"event": "comms_guest_name",
|
||||
"status": "ok",
|
||||
"processed": summary.processed,
|
||||
"renamed": summary.renamed,
|
||||
"pruned": summary.pruned,
|
||||
"skipped": summary.skipped,
|
||||
},
|
||||
)
|
||||
return {"status": "ok", **summary.__dict__}
|
||||
|
||||
def run_pin_invite(self, wait: bool = True) -> dict[str, Any]:
|
||||
if not settings.comms_seeder_password:
|
||||
raise RuntimeError("comms seeder password missing")
|
||||
|
||||
with self._client() as client:
|
||||
token = self._login(client, settings.comms_seeder_user, settings.comms_seeder_password)
|
||||
room_id = self._resolve_alias(client, token, settings.comms_room_alias)
|
||||
pinned = self._get_pinned(client, token, room_id)
|
||||
for event_id in pinned:
|
||||
event = self._get_event(client, token, room_id, event_id)
|
||||
if event and (event.get("content") or {}).get("body") == settings.comms_pin_message:
|
||||
return {"status": "ok", "detail": "already pinned"}
|
||||
event_id = self._send_message(client, token, room_id, settings.comms_pin_message)
|
||||
if not event_id:
|
||||
return {"status": "error", "detail": "pin event_id missing"}
|
||||
self._pin_message(client, token, room_id, event_id)
|
||||
return {"status": "ok", "detail": "pinned"}
|
||||
|
||||
def run_reset_room(self, wait: bool = True) -> dict[str, Any]:
|
||||
if not settings.comms_seeder_password:
|
||||
raise RuntimeError("comms seeder password missing")
|
||||
|
||||
with self._client() as client:
|
||||
token = self._login_with_retry(client, settings.comms_seeder_user, settings.comms_seeder_password)
|
||||
old_room_id = self._resolve_alias(client, token, settings.comms_room_alias)
|
||||
new_room_id = self._create_room(client, token, settings.comms_room_name)
|
||||
self._set_room_state(client, token, new_room_id, "m.room.join_rules", {"join_rule": "public"})
|
||||
self._set_room_state(client, token, new_room_id, "m.room.guest_access", {"guest_access": "can_join"})
|
||||
self._set_room_state(
|
||||
client,
|
||||
token,
|
||||
new_room_id,
|
||||
"m.room.history_visibility",
|
||||
{"history_visibility": "shared"},
|
||||
)
|
||||
self._set_room_state(client, token, new_room_id, "m.room.power_levels", self._power_levels())
|
||||
|
||||
self._delete_alias(client, token, settings.comms_room_alias)
|
||||
self._put_alias(client, token, settings.comms_room_alias, new_room_id)
|
||||
self._set_room_state(
|
||||
client,
|
||||
token,
|
||||
new_room_id,
|
||||
"m.room.canonical_alias",
|
||||
{"alias": settings.comms_room_alias},
|
||||
)
|
||||
self._set_directory_visibility(client, token, new_room_id, "public")
|
||||
|
||||
bot_user_id = _canon_user(settings.comms_bot_user, settings.comms_server_name)
|
||||
self._invite_user(client, token, new_room_id, bot_user_id)
|
||||
for uid in self._list_joined_members(client, token, old_room_id):
|
||||
if uid == _canon_user(settings.comms_seeder_user, settings.comms_server_name):
|
||||
continue
|
||||
localpart = uid.split(":", 1)[0].lstrip("@")
|
||||
if localpart.isdigit():
|
||||
continue
|
||||
self._invite_user(client, token, new_room_id, uid)
|
||||
|
||||
event_id = self._send_message(client, token, new_room_id, settings.comms_pin_message)
|
||||
if not event_id:
|
||||
raise RuntimeError("pin message event_id missing")
|
||||
self._set_room_state(client, token, new_room_id, "m.room.pinned_events", {"pinned": [event_id]})
|
||||
|
||||
self._set_directory_visibility(client, token, old_room_id, "private")
|
||||
self._set_room_state(client, token, old_room_id, "m.room.join_rules", {"join_rule": "invite"})
|
||||
self._set_room_state(client, token, old_room_id, "m.room.guest_access", {"guest_access": "forbidden"})
|
||||
self._set_room_state(
|
||||
client,
|
||||
token,
|
||||
old_room_id,
|
||||
"m.room.tombstone",
|
||||
{
|
||||
"body": "Othrys has been reset. Please join the new room.",
|
||||
"replacement_room": new_room_id,
|
||||
},
|
||||
)
|
||||
self._send_message(
|
||||
client,
|
||||
token,
|
||||
old_room_id,
|
||||
"Othrys was reset. Join the new room at https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join",
|
||||
)
|
||||
|
||||
return {"status": "ok", "detail": f"old_room_id={old_room_id} new_room_id={new_room_id}"}
|
||||
|
||||
def run_seed_room(self, wait: bool = True) -> dict[str, Any]:
|
||||
if not settings.comms_seeder_password or not settings.comms_bot_password:
|
||||
raise RuntimeError("comms seeder/bot password missing")
|
||||
|
||||
with self._client() as client:
|
||||
token = self._login(client, settings.comms_seeder_user, settings.comms_seeder_password)
|
||||
for user, password, admin in (
|
||||
(settings.comms_seeder_user, settings.comms_seeder_password, True),
|
||||
(settings.comms_bot_user, settings.comms_bot_password, False),
|
||||
):
|
||||
try:
|
||||
self._ensure_user(client, token, user, password, admin)
|
||||
except RuntimeError as exc:
|
||||
message = str(exc)
|
||||
if "You are not a server admin" in message:
|
||||
logger.warning(
|
||||
"comms seed room ensure skipped",
|
||||
extra={"event": "comms_seed_room", "user": user, "detail": message},
|
||||
)
|
||||
continue
|
||||
raise
|
||||
room_id = self._ensure_room(client, token)
|
||||
self._join_user(client, token, room_id, _canon_user(settings.comms_bot_user, settings.comms_server_name))
|
||||
self._join_all_locals(client, token, room_id)
|
||||
return {"status": "ok", "detail": "room seeded"}
|
||||
|
||||
def _login(self, client: httpx.Client, user: str, password: str) -> str:
|
||||
resp = client.post(
|
||||
f"{settings.comms_auth_base}/_matrix/client/v3/login",
|
||||
json={
|
||||
"type": "m.login.password",
|
||||
"identifier": {"type": "m.id.user", "user": _canon_user(user, settings.comms_server_name)},
|
||||
"password": password,
|
||||
},
|
||||
)
|
||||
if resp.status_code != HTTP_OK:
|
||||
raise RuntimeError(f"login failed: {resp.status_code} {resp.text}")
|
||||
payload = resp.json()
|
||||
token = payload.get("access_token")
|
||||
if not isinstance(token, str) or not token:
|
||||
raise RuntimeError("login missing token")
|
||||
return token
|
||||
|
||||
def _login_with_retry(self, client: httpx.Client, user: str, password: str) -> str:
|
||||
last: Exception | None = None
|
||||
for attempt in range(1, 6):
|
||||
try:
|
||||
return self._login(client, user, password)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
last = exc
|
||||
time.sleep(attempt * 2)
|
||||
raise RuntimeError(str(last) if last else "login failed")
|
||||
|
||||
def _get_pinned(self, client: httpx.Client, token: str, room_id: str) -> list[str]:
|
||||
resp = client.get(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events",
|
||||
headers=_auth(token),
|
||||
)
|
||||
if resp.status_code == HTTP_NOT_FOUND:
|
||||
return []
|
||||
resp.raise_for_status()
|
||||
pinned = resp.json().get("pinned", [])
|
||||
return [item for item in pinned if isinstance(item, str)]
|
||||
|
||||
def _get_event(self, client: httpx.Client, token: str, room_id: str, event_id: str) -> dict[str, Any] | None:
|
||||
resp = client.get(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/event/{urllib.parse.quote(event_id)}",
|
||||
headers=_auth(token),
|
||||
)
|
||||
if resp.status_code == HTTP_NOT_FOUND:
|
||||
return None
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
def _send_message(self, client: httpx.Client, token: str, room_id: str, body: str) -> str:
|
||||
resp = client.post(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message",
|
||||
headers=_auth(token),
|
||||
json={"msgtype": "m.text", "body": body},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
event_id = payload.get("event_id")
|
||||
return event_id if isinstance(event_id, str) else ""
|
||||
|
||||
def _pin_message(self, client: httpx.Client, token: str, room_id: str, event_id: str) -> None:
|
||||
resp = client.put(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events",
|
||||
headers=_auth(token),
|
||||
json={"pinned": [event_id]},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
def _create_room(self, client: httpx.Client, token: str, name: str) -> str:
|
||||
resp = client.post(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/createRoom",
|
||||
headers=_auth(token),
|
||||
json={"preset": "public_chat", "name": name, "room_version": "11"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()["room_id"]
|
||||
|
||||
def _set_room_state(self, client: httpx.Client, token: str, room_id: str, ev_type: str, content: dict[str, Any]) -> None:
|
||||
resp = client.put(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/{ev_type}",
|
||||
headers=_auth(token),
|
||||
json=content,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
def _set_directory_visibility(self, client: httpx.Client, token: str, room_id: str, visibility: str) -> None:
|
||||
resp = client.put(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/directory/list/room/{urllib.parse.quote(room_id)}",
|
||||
headers=_auth(token),
|
||||
json={"visibility": visibility},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
def _delete_alias(self, client: httpx.Client, token: str, alias: str) -> None:
|
||||
resp = client.delete(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/directory/room/{urllib.parse.quote(alias)}",
|
||||
headers=_auth(token),
|
||||
)
|
||||
if resp.status_code in (HTTP_OK, HTTP_ACCEPTED, HTTP_NOT_FOUND):
|
||||
return
|
||||
resp.raise_for_status()
|
||||
|
||||
def _put_alias(self, client: httpx.Client, token: str, alias: str, room_id: str) -> None:
|
||||
resp = client.put(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/directory/room/{urllib.parse.quote(alias)}",
|
||||
headers=_auth(token),
|
||||
json={"room_id": room_id},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
def _list_joined_members(self, client: httpx.Client, token: str, room_id: str) -> list[str]:
|
||||
resp = client.get(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members?membership=join",
|
||||
headers=_auth(token),
|
||||
)
|
||||
resp.raise_for_status()
|
||||
members = []
|
||||
for ev in resp.json().get("chunk", []) or []:
|
||||
if ev.get("type") != "m.room.member":
|
||||
continue
|
||||
uid = ev.get("state_key")
|
||||
if isinstance(uid, str) and uid.startswith("@"):
|
||||
members.append(uid)
|
||||
return members
|
||||
|
||||
def _invite_user(self, client: httpx.Client, token: str, room_id: str, user_id: str) -> None:
|
||||
resp = client.post(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/invite",
|
||||
headers=_auth(token),
|
||||
json={"user_id": user_id},
|
||||
)
|
||||
if resp.status_code in (HTTP_OK, HTTP_ACCEPTED):
|
||||
return
|
||||
resp.raise_for_status()
|
||||
|
||||
def _power_levels(self) -> dict[str, Any]:
|
||||
return {
|
||||
"ban": 50,
|
||||
"events": {
|
||||
"m.room.avatar": 50,
|
||||
"m.room.canonical_alias": 50,
|
||||
"m.room.encryption": 100,
|
||||
"m.room.history_visibility": 100,
|
||||
"m.room.name": 50,
|
||||
"m.room.power_levels": 100,
|
||||
"m.room.server_acl": 100,
|
||||
"m.room.tombstone": 100,
|
||||
},
|
||||
"events_default": 0,
|
||||
"historical": 100,
|
||||
"invite": 50,
|
||||
"kick": 50,
|
||||
"m.call.invite": 50,
|
||||
"redact": 50,
|
||||
"state_default": 50,
|
||||
"users": { _canon_user(settings.comms_seeder_user, settings.comms_server_name): 100 },
|
||||
"users_default": 0,
|
||||
}
|
||||
|
||||
def _ensure_user(self, client: httpx.Client, token: str, localpart: str, password: str, admin: bool) -> None:
|
||||
admin_token = self._admin_token(token)
|
||||
user_id = _canon_user(localpart, settings.comms_server_name)
|
||||
url = f"{settings.comms_synapse_base}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}"
|
||||
resp = client.get(url, headers=_auth(admin_token))
|
||||
if resp.status_code == HTTP_OK:
|
||||
return
|
||||
payload = {"password": password, "admin": admin, "deactivated": False}
|
||||
create = client.put(url, headers=_auth(admin_token), json=payload)
|
||||
if create.status_code not in (HTTP_OK, HTTP_CREATED):
|
||||
raise RuntimeError(f"create user {user_id} failed: {create.status_code} {create.text}")
|
||||
|
||||
def _ensure_room(self, client: httpx.Client, token: str) -> str:
|
||||
alias = settings.comms_room_alias
|
||||
alias_enc = urllib.parse.quote(alias)
|
||||
exists = client.get(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/directory/room/{alias_enc}",
|
||||
headers=_auth(token),
|
||||
)
|
||||
if exists.status_code == HTTP_OK:
|
||||
room_id = exists.json()["room_id"]
|
||||
else:
|
||||
create = client.post(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/createRoom",
|
||||
headers=_auth(token),
|
||||
json={
|
||||
"preset": "public_chat",
|
||||
"name": settings.comms_room_name,
|
||||
"room_alias_name": alias.split(":", 1)[0].lstrip("#"),
|
||||
"initial_state": [],
|
||||
"power_level_content_override": {
|
||||
"events_default": 0,
|
||||
"users_default": 0,
|
||||
"state_default": 50,
|
||||
},
|
||||
},
|
||||
)
|
||||
if create.status_code not in (HTTP_OK, HTTP_CONFLICT):
|
||||
raise RuntimeError(f"create room failed: {create.status_code} {create.text}")
|
||||
exists = client.get(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/directory/room/{alias_enc}",
|
||||
headers=_auth(token),
|
||||
)
|
||||
room_id = exists.json()["room_id"]
|
||||
|
||||
state_events = [
|
||||
("m.room.join_rules", {"join_rule": "public"}),
|
||||
("m.room.guest_access", {"guest_access": "can_join"}),
|
||||
("m.room.history_visibility", {"history_visibility": "shared"}),
|
||||
("m.room.canonical_alias", {"alias": alias}),
|
||||
]
|
||||
for ev_type, content in state_events:
|
||||
client.put(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/rooms/{room_id}/state/{ev_type}",
|
||||
headers=_auth(token),
|
||||
json=content,
|
||||
)
|
||||
client.put(
|
||||
f"{settings.comms_synapse_base}/_matrix/client/v3/directory/list/room/{room_id}",
|
||||
headers=_auth(token),
|
||||
json={"visibility": "public"},
|
||||
)
|
||||
return room_id
|
||||
|
||||
def _join_user(self, client: httpx.Client, token: str, room_id: str, user_id: str) -> None:
|
||||
admin_token = self._admin_token(token)
|
||||
client.post(
|
||||
f"{settings.comms_synapse_base}/_synapse/admin/v1/join/{urllib.parse.quote(room_id)}",
|
||||
headers=_auth(admin_token),
|
||||
json={"user_id": user_id},
|
||||
)
|
||||
|
||||
def _join_all_locals(self, client: httpx.Client, token: str, room_id: str) -> None:
|
||||
users: list[str] = []
|
||||
from_token = None
|
||||
admin_token = self._admin_token(token)
|
||||
while True:
|
||||
url = f"{settings.comms_synapse_base}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100"
|
||||
if from_token:
|
||||
url += f"&from={from_token}"
|
||||
resp = client.get(url, headers=_auth(admin_token))
|
||||
payload = resp.json()
|
||||
users.extend([u["name"] for u in payload.get("users", []) if isinstance(u, dict) and u.get("name")])
|
||||
from_token = payload.get("next_token")
|
||||
if not from_token:
|
||||
break
|
||||
for uid in users:
|
||||
self._join_user(client, token, room_id, uid)
|
||||
def _time(self) -> float:
|
||||
return time.time()
|
||||
|
||||
|
||||
comms = CommsService()
|
||||
|
||||
__all__ = ["CommsService", "_canon_user", "comms", "psycopg", "settings"]
|
||||
|
||||
454
ariadne/services/comms_guest_names.py
Normal file
454
ariadne/services/comms_guest_names.py
Normal file
@ -0,0 +1,454 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
from typing import Any
|
||||
import urllib.parse
|
||||
|
||||
import httpx
|
||||
|
||||
from ..utils.logging import get_logger
|
||||
from .comms_protocol import (
|
||||
HTTP_ACCEPTED,
|
||||
HTTP_CREATED,
|
||||
HTTP_NO_CONTENT,
|
||||
HTTP_NOT_FOUND,
|
||||
HTTP_OK,
|
||||
CommsSummary,
|
||||
DisplayNameTarget,
|
||||
MasGuestResult,
|
||||
SynapseGuestResult,
|
||||
SynapseUserRef,
|
||||
_auth,
|
||||
_needs_rename_display,
|
||||
_needs_rename_username,
|
||||
)
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class _CommsGuestNameMixin:
|
||||
def _mas_admin_token(self, client: httpx.Client) -> str:
|
||||
settings = self._settings
|
||||
if not settings.comms_mas_admin_client_id or not settings.comms_mas_admin_client_secret:
|
||||
raise RuntimeError("mas admin client credentials missing")
|
||||
basic = base64.b64encode(
|
||||
f"{settings.comms_mas_admin_client_id}:{settings.comms_mas_admin_client_secret}".encode()
|
||||
).decode()
|
||||
last_err: Exception | None = None
|
||||
for attempt in range(5):
|
||||
try:
|
||||
resp = client.post(
|
||||
settings.comms_mas_token_url,
|
||||
headers={"Authorization": f"Basic {basic}"},
|
||||
data={"grant_type": "client_credentials", "scope": "urn:mas:admin"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
token = payload.get("access_token")
|
||||
if not isinstance(token, str) or not token:
|
||||
raise RuntimeError("missing mas access token")
|
||||
return token
|
||||
except Exception as exc: # noqa: BLE001
|
||||
last_err = exc
|
||||
self._sleep(2**attempt)
|
||||
raise RuntimeError(str(last_err) if last_err else "mas admin token failed")
|
||||
|
||||
def _mas_user_id(self, client: httpx.Client, token: str, username: str) -> str:
|
||||
url = f"{self._settings.comms_mas_admin_api_base}/users/by-username/{urllib.parse.quote(username)}"
|
||||
resp = client.get(url, headers=_auth(token))
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
return payload["data"]["id"]
|
||||
|
||||
def _mas_personal_session(self, client: httpx.Client, token: str, user_id: str) -> tuple[str, str]:
|
||||
resp = client.post(
|
||||
f"{self._settings.comms_mas_admin_api_base}/personal-sessions",
|
||||
headers=_auth(token),
|
||||
json={
|
||||
"actor_user_id": user_id,
|
||||
"human_name": "guest-name-randomizer",
|
||||
"scope": "urn:matrix:client:api:*",
|
||||
"expires_in": 300,
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json().get("data", {})
|
||||
session_id = payload.get("id")
|
||||
attrs = (payload.get("attributes") or {}) if isinstance(payload, dict) else {}
|
||||
access_token = attrs.get("access_token")
|
||||
if not isinstance(access_token, str) or not isinstance(session_id, str):
|
||||
raise RuntimeError("invalid personal session response")
|
||||
return access_token, session_id
|
||||
|
||||
def _mas_revoke_session(self, client: httpx.Client, token: str, session_id: str) -> None:
|
||||
try:
|
||||
client.post(
|
||||
f"{self._settings.comms_mas_admin_api_base}/personal-sessions/{urllib.parse.quote(session_id)}/revoke",
|
||||
headers=_auth(token),
|
||||
json={},
|
||||
)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def _room_members(self, client: httpx.Client, token: str, room_id: str) -> tuple[set[str], set[str]]:
|
||||
resp = client.get(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members",
|
||||
headers=_auth(token),
|
||||
)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
members: set[str] = set()
|
||||
existing: set[str] = set()
|
||||
for ev in payload.get("chunk", []) or []:
|
||||
user_id = ev.get("state_key")
|
||||
if isinstance(user_id, str) and user_id:
|
||||
members.add(user_id)
|
||||
display = (ev.get("content") or {}).get("displayname")
|
||||
if isinstance(display, str) and display:
|
||||
existing.add(display)
|
||||
return members, existing
|
||||
|
||||
def _mas_list_users(self, client: httpx.Client, token: str) -> list[dict[str, Any]]:
|
||||
users: list[dict[str, Any]] = []
|
||||
cursor = None
|
||||
while True:
|
||||
url = f"{self._settings.comms_mas_admin_api_base}/users?page[size]=100"
|
||||
if cursor:
|
||||
url += f"&page[after]={urllib.parse.quote(cursor)}"
|
||||
resp = client.get(url, headers=_auth(token))
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
data = payload.get("data") or []
|
||||
if not isinstance(data, list) or not data:
|
||||
break
|
||||
users.extend([item for item in data if isinstance(item, dict)])
|
||||
last = data[-1]
|
||||
cursor = (
|
||||
last.get("meta", {})
|
||||
if isinstance(last, dict)
|
||||
else {}
|
||||
).get("page", {}).get("cursor")
|
||||
if not cursor:
|
||||
break
|
||||
return users
|
||||
|
||||
def _synapse_list_users(self, client: httpx.Client, token: str) -> list[dict[str, Any]]:
|
||||
users: list[dict[str, Any]] = []
|
||||
from_token = None
|
||||
admin_token = self._admin_token(token)
|
||||
while True:
|
||||
url = "{}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100".format(
|
||||
self._settings.comms_synapse_base
|
||||
)
|
||||
if from_token:
|
||||
url += f"&from={urllib.parse.quote(from_token)}"
|
||||
resp = client.get(url, headers=_auth(admin_token))
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
users.extend([item for item in payload.get("users", []) if isinstance(item, dict)])
|
||||
from_token = payload.get("next_token")
|
||||
if not from_token:
|
||||
break
|
||||
return users
|
||||
|
||||
def _should_prune_guest(self, entry: dict[str, Any], now_ms: int) -> bool:
|
||||
if not entry.get("is_guest"):
|
||||
return False
|
||||
last_seen = entry.get("last_seen_ts")
|
||||
if last_seen is None:
|
||||
return False
|
||||
try:
|
||||
last_seen = int(last_seen)
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
stale_ms = int(self._settings.comms_guest_stale_days) * 24 * 60 * 60 * 1000
|
||||
return now_ms - last_seen > stale_ms
|
||||
|
||||
def _prune_guest(self, client: httpx.Client, token: str, user_id: str) -> bool:
|
||||
admin_token = self._admin_token(token)
|
||||
try:
|
||||
resp = client.delete(
|
||||
f"{self._settings.comms_synapse_base}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
|
||||
headers=_auth(admin_token),
|
||||
params={"erase": "true"},
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.info(
|
||||
"guest prune failed",
|
||||
extra={"event": "comms_guest_prune", "status": "error", "detail": str(exc)},
|
||||
)
|
||||
return False
|
||||
if resp.status_code in (HTTP_OK, HTTP_ACCEPTED, HTTP_NO_CONTENT, HTTP_NOT_FOUND):
|
||||
return True
|
||||
logger.info(
|
||||
"guest prune failed",
|
||||
extra={
|
||||
"event": "comms_guest_prune",
|
||||
"status": "error",
|
||||
"detail": f"{resp.status_code} {resp.text}",
|
||||
},
|
||||
)
|
||||
return False
|
||||
|
||||
def _get_displayname(self, client: httpx.Client, token: str, user_id: str) -> str | None:
|
||||
resp = client.get(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}",
|
||||
headers=_auth(token),
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("displayname")
|
||||
|
||||
def _get_displayname_admin(self, client: httpx.Client, token: str, user_id: str) -> str | None:
|
||||
admin_token = self._admin_token(token)
|
||||
resp = client.get(
|
||||
f"{self._settings.comms_synapse_base}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
|
||||
headers=_auth(admin_token),
|
||||
)
|
||||
if resp.status_code == HTTP_NOT_FOUND:
|
||||
return None
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("displayname")
|
||||
|
||||
def _set_displayname(self, client: httpx.Client, token: str, target: DisplayNameTarget) -> None:
|
||||
resp = client.put(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/profile/{urllib.parse.quote(target.user_id)}/displayname",
|
||||
headers=_auth(token),
|
||||
json={"displayname": target.name},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
if not target.in_room:
|
||||
return
|
||||
state_url = (
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(target.room_id)}"
|
||||
f"/state/m.room.member/{urllib.parse.quote(target.user_id)}"
|
||||
)
|
||||
client.put(
|
||||
state_url,
|
||||
headers=_auth(token),
|
||||
json={"membership": "join", "displayname": target.name},
|
||||
)
|
||||
|
||||
def _set_displayname_admin(self, client: httpx.Client, token: str, user_id: str, name: str) -> bool:
|
||||
admin_token = self._admin_token(token)
|
||||
resp = client.put(
|
||||
f"{self._settings.comms_synapse_base}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
|
||||
headers=_auth(admin_token),
|
||||
json={"displayname": name},
|
||||
)
|
||||
return resp.status_code in (HTTP_OK, HTTP_CREATED, HTTP_NO_CONTENT)
|
||||
|
||||
def _db_rename_numeric(self, existing: set[str]) -> int:
|
||||
settings = self._settings
|
||||
if not settings.comms_synapse_db_password:
|
||||
return 0
|
||||
renamed = 0
|
||||
conn = self._connect_synapse_db()
|
||||
try:
|
||||
with conn:
|
||||
with conn.cursor() as cur:
|
||||
pattern = f"^@\\d+:{settings.comms_server_name}$"
|
||||
cur.execute(
|
||||
"SELECT user_id, full_user_id, displayname FROM profiles WHERE full_user_id ~ %s",
|
||||
(pattern,),
|
||||
)
|
||||
profile_rows = cur.fetchall()
|
||||
profile_index = {row[1]: row for row in profile_rows}
|
||||
for _user_id, full_user_id, display in profile_rows:
|
||||
if display and not _needs_rename_display(display):
|
||||
continue
|
||||
new_name = self._pick_guest_name(existing)
|
||||
if not new_name:
|
||||
continue
|
||||
cur.execute(
|
||||
"UPDATE profiles SET displayname = %s WHERE full_user_id = %s",
|
||||
(new_name, full_user_id),
|
||||
)
|
||||
renamed += 1
|
||||
|
||||
cur.execute(
|
||||
"SELECT name FROM users WHERE name ~ %s",
|
||||
(pattern,),
|
||||
)
|
||||
users = [row[0] for row in cur.fetchall()]
|
||||
if not users:
|
||||
return renamed
|
||||
cur.execute(
|
||||
"SELECT user_id, full_user_id FROM profiles WHERE full_user_id = ANY(%s)",
|
||||
(users,),
|
||||
)
|
||||
for existing_full in cur.fetchall():
|
||||
profile_index.setdefault(existing_full[1], existing_full)
|
||||
|
||||
for full_user_id in users:
|
||||
if full_user_id in profile_index:
|
||||
continue
|
||||
localpart = full_user_id.split(":", 1)[0].lstrip("@")
|
||||
new_name = self._pick_guest_name(existing)
|
||||
if not new_name:
|
||||
continue
|
||||
cur.execute(
|
||||
"INSERT INTO profiles (user_id, displayname, full_user_id) VALUES (%s, %s, %s) "
|
||||
"ON CONFLICT (full_user_id) DO UPDATE SET displayname = EXCLUDED.displayname",
|
||||
(localpart, new_name, full_user_id),
|
||||
)
|
||||
renamed += 1
|
||||
finally:
|
||||
conn.close()
|
||||
return renamed
|
||||
|
||||
def _validate_guest_name_settings(self) -> None:
|
||||
if not self._settings.comms_mas_admin_client_id or not self._settings.comms_mas_admin_client_secret:
|
||||
raise RuntimeError("comms mas admin secret missing")
|
||||
if not self._settings.comms_synapse_base:
|
||||
raise RuntimeError("comms synapse base missing")
|
||||
|
||||
def _room_context(self, client: httpx.Client, token: str) -> tuple[str, set[str], set[str]]:
|
||||
room_id = self._resolve_alias(client, token, self._settings.comms_room_alias)
|
||||
members, existing = self._room_members(client, token, room_id)
|
||||
return room_id, members, existing
|
||||
|
||||
def _rename_mas_guests(self, client: httpx.Client, admin_token: str, room_id: str, members: set[str], existing: set[str]) -> MasGuestResult:
|
||||
renamed = 0
|
||||
skipped = 0
|
||||
mas_usernames: set[str] = set()
|
||||
users = self._mas_list_users(client, admin_token)
|
||||
for user in users:
|
||||
attrs = user.get("attributes") or {}
|
||||
username = attrs.get("username") or ""
|
||||
if isinstance(username, str) and username:
|
||||
mas_usernames.add(username)
|
||||
legacy_guest = attrs.get("legacy_guest")
|
||||
if not isinstance(username, str) or not username:
|
||||
skipped += 1
|
||||
continue
|
||||
if not (legacy_guest or _needs_rename_username(username)):
|
||||
skipped += 1
|
||||
continue
|
||||
user_id = user.get("id")
|
||||
if not isinstance(user_id, str) or not user_id:
|
||||
skipped += 1
|
||||
continue
|
||||
full_user = f"@{username}:{self._settings.comms_server_name}"
|
||||
access_token, session_id = self._mas_personal_session(client, admin_token, user_id)
|
||||
try:
|
||||
display = self._get_displayname(client, access_token, full_user)
|
||||
if display and not _needs_rename_display(display):
|
||||
skipped += 1
|
||||
continue
|
||||
new_name = self._pick_guest_name(existing)
|
||||
if not new_name:
|
||||
skipped += 1
|
||||
continue
|
||||
self._set_displayname(
|
||||
client,
|
||||
access_token,
|
||||
DisplayNameTarget(
|
||||
room_id=room_id,
|
||||
user_id=full_user,
|
||||
name=new_name,
|
||||
in_room=full_user in members,
|
||||
),
|
||||
)
|
||||
renamed += 1
|
||||
finally:
|
||||
self._mas_revoke_session(client, admin_token, session_id)
|
||||
return MasGuestResult(renamed=renamed, skipped=skipped, usernames=mas_usernames)
|
||||
|
||||
def _synapse_entries(self, client: httpx.Client, token: str) -> list[dict[str, Any]]:
|
||||
try:
|
||||
return self._synapse_list_users(client, token)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
logger.info(
|
||||
"synapse admin list skipped",
|
||||
extra={"event": "comms_guest_list", "status": "error", "detail": str(exc)},
|
||||
)
|
||||
return []
|
||||
|
||||
def _synapse_user_id(self, entry: dict[str, Any]) -> SynapseUserRef | None:
|
||||
user_id = entry.get("name") or ""
|
||||
if not isinstance(user_id, str) or not user_id.startswith("@"):
|
||||
return None
|
||||
localpart = user_id.split(":", 1)[0].lstrip("@")
|
||||
return SynapseUserRef(entry=entry, user_id=user_id, localpart=localpart)
|
||||
|
||||
def _maybe_prune_synapse_guest(self, client: httpx.Client, token: str, entry: dict[str, Any], user_id: str, now_ms: int) -> bool:
|
||||
if not entry.get("is_guest"):
|
||||
return False
|
||||
if not self._should_prune_guest(entry, now_ms):
|
||||
return False
|
||||
return self._prune_guest(client, token, user_id)
|
||||
|
||||
def _needs_synapse_rename(self, client: httpx.Client, token: str, user: SynapseUserRef, mas_usernames: set[str]) -> bool:
|
||||
if user.localpart in mas_usernames:
|
||||
return False
|
||||
is_guest = user.entry.get("is_guest")
|
||||
if not (is_guest or _needs_rename_username(user.localpart)):
|
||||
return False
|
||||
display = self._get_displayname_admin(client, token, user.user_id)
|
||||
if display and not _needs_rename_display(display):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _rename_synapse_user(self, client: httpx.Client, token: str, existing: set[str], user_id: str) -> bool:
|
||||
new_name = self._pick_guest_name(existing)
|
||||
if not new_name:
|
||||
return False
|
||||
return self._set_displayname_admin(client, token, user_id, new_name)
|
||||
|
||||
def _rename_synapse_guests(self, client: httpx.Client, token: str, existing: set[str], mas_usernames: set[str]) -> SynapseGuestResult:
|
||||
renamed = 0
|
||||
pruned = 0
|
||||
entries = self._synapse_entries(client, token)
|
||||
|
||||
now_ms = int(self._time() * 1000)
|
||||
for entry in entries:
|
||||
user_ref = self._synapse_user_id(entry)
|
||||
if not user_ref:
|
||||
continue
|
||||
if self._maybe_prune_synapse_guest(client, token, user_ref.entry, user_ref.user_id, now_ms):
|
||||
pruned += 1
|
||||
continue
|
||||
if not self._needs_synapse_rename(client, token, user_ref, mas_usernames):
|
||||
continue
|
||||
if self._rename_synapse_user(client, token, existing, user_ref.user_id):
|
||||
renamed += 1
|
||||
return SynapseGuestResult(renamed=renamed, pruned=pruned)
|
||||
|
||||
def run_guest_name_randomizer(self, wait: bool = True) -> dict[str, Any]:
|
||||
self._validate_guest_name_settings()
|
||||
|
||||
with self._client() as client:
|
||||
admin_token = self._mas_admin_token(client)
|
||||
seeder_id = self._mas_user_id(client, admin_token, self._settings.comms_seeder_user)
|
||||
seeder_token, seeder_session = self._mas_personal_session(client, admin_token, seeder_id)
|
||||
try:
|
||||
room_id, members, existing = self._room_context(client, seeder_token)
|
||||
mas_result = self._rename_mas_guests(client, admin_token, room_id, members, existing)
|
||||
synapse_result = self._rename_synapse_guests(
|
||||
client,
|
||||
seeder_token,
|
||||
existing,
|
||||
mas_result.usernames,
|
||||
)
|
||||
db_renamed = self._db_rename_numeric(existing)
|
||||
finally:
|
||||
self._mas_revoke_session(client, admin_token, seeder_session)
|
||||
|
||||
renamed = mas_result.renamed + synapse_result.renamed + db_renamed
|
||||
pruned = synapse_result.pruned
|
||||
skipped = mas_result.skipped
|
||||
processed = renamed + pruned + skipped
|
||||
summary = CommsSummary(processed, renamed, pruned, skipped)
|
||||
logger.info(
|
||||
"comms guest name sync finished",
|
||||
extra={
|
||||
"event": "comms_guest_name",
|
||||
"status": "ok",
|
||||
"processed": summary.processed,
|
||||
"renamed": summary.renamed,
|
||||
"pruned": summary.pruned,
|
||||
"skipped": summary.skipped,
|
||||
},
|
||||
)
|
||||
return {"status": "ok", **summary.__dict__}
|
||||
72
ariadne/services/comms_protocol.py
Normal file
72
ariadne/services/comms_protocol.py
Normal file
@ -0,0 +1,72 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
HTTP_OK = 200
|
||||
HTTP_CREATED = 201
|
||||
HTTP_ACCEPTED = 202
|
||||
HTTP_NO_CONTENT = 204
|
||||
HTTP_NOT_FOUND = 404
|
||||
HTTP_CONFLICT = 409
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CommsSummary:
|
||||
processed: int
|
||||
renamed: int
|
||||
pruned: int
|
||||
skipped: int
|
||||
detail: str = ""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class MasGuestResult:
|
||||
renamed: int
|
||||
skipped: int
|
||||
usernames: set[str]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SynapseGuestResult:
|
||||
renamed: int
|
||||
pruned: int
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class DisplayNameTarget:
|
||||
room_id: str
|
||||
user_id: str
|
||||
name: str
|
||||
in_room: bool
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SynapseUserRef:
|
||||
entry: dict[str, Any]
|
||||
user_id: str
|
||||
localpart: str
|
||||
|
||||
|
||||
def _auth(token: str) -> dict[str, str]:
|
||||
return {"Authorization": f"Bearer {token}"}
|
||||
|
||||
|
||||
def _canon_user(user: str, server_name: str) -> str:
|
||||
user = (user or "").strip()
|
||||
if user.startswith("@") and ":" in user:
|
||||
return user
|
||||
user = user.lstrip("@")
|
||||
if ":" in user:
|
||||
return f"@{user}"
|
||||
return f"@{user}:{server_name}"
|
||||
|
||||
|
||||
def _needs_rename_username(username: str) -> bool:
|
||||
return username.isdigit() or username.startswith("guest-")
|
||||
|
||||
|
||||
def _needs_rename_display(display: str | None) -> bool:
|
||||
if not display:
|
||||
return True
|
||||
return display.isdigit() or display.startswith("guest-")
|
||||
389
ariadne/services/comms_room_ops.py
Normal file
389
ariadne/services/comms_room_ops.py
Normal file
@ -0,0 +1,389 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
import urllib.parse
|
||||
|
||||
import httpx
|
||||
|
||||
from ..utils.logging import get_logger
|
||||
from .comms_protocol import (
|
||||
HTTP_ACCEPTED,
|
||||
HTTP_CONFLICT,
|
||||
HTTP_CREATED,
|
||||
HTTP_NOT_FOUND,
|
||||
HTTP_OK,
|
||||
_auth,
|
||||
_canon_user,
|
||||
)
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
class _CommsRoomOpsMixin:
|
||||
def run_pin_invite(self, wait: bool = True) -> dict[str, Any]:
|
||||
if not self._settings.comms_seeder_password:
|
||||
raise RuntimeError("comms seeder password missing")
|
||||
|
||||
with self._client() as client:
|
||||
token = self._login(client, self._settings.comms_seeder_user, self._settings.comms_seeder_password)
|
||||
room_id = self._resolve_alias(client, token, self._settings.comms_room_alias)
|
||||
pinned = self._get_pinned(client, token, room_id)
|
||||
for event_id in pinned:
|
||||
event = self._get_event(client, token, room_id, event_id)
|
||||
if event and (event.get("content") or {}).get("body") == self._settings.comms_pin_message:
|
||||
return {"status": "ok", "detail": "already pinned"}
|
||||
event_id = self._send_message(client, token, room_id, self._settings.comms_pin_message)
|
||||
if not event_id:
|
||||
return {"status": "error", "detail": "pin event_id missing"}
|
||||
self._pin_message(client, token, room_id, event_id)
|
||||
return {"status": "ok", "detail": "pinned"}
|
||||
|
||||
def run_reset_room(self, wait: bool = True) -> dict[str, Any]:
|
||||
if not self._settings.comms_seeder_password:
|
||||
raise RuntimeError("comms seeder password missing")
|
||||
|
||||
with self._client() as client:
|
||||
token = self._login_with_retry(client, self._settings.comms_seeder_user, self._settings.comms_seeder_password)
|
||||
old_room_id = self._resolve_alias(client, token, self._settings.comms_room_alias)
|
||||
new_room_id = self._create_room(client, token, self._settings.comms_room_name)
|
||||
self._set_room_state(client, token, new_room_id, "m.room.join_rules", {"join_rule": "public"})
|
||||
self._set_room_state(client, token, new_room_id, "m.room.guest_access", {"guest_access": "can_join"})
|
||||
self._set_room_state(
|
||||
client,
|
||||
token,
|
||||
new_room_id,
|
||||
"m.room.history_visibility",
|
||||
{"history_visibility": "shared"},
|
||||
)
|
||||
self._set_room_state(client, token, new_room_id, "m.room.power_levels", self._power_levels())
|
||||
|
||||
self._delete_alias(client, token, self._settings.comms_room_alias)
|
||||
self._put_alias(client, token, self._settings.comms_room_alias, new_room_id)
|
||||
self._set_room_state(
|
||||
client,
|
||||
token,
|
||||
new_room_id,
|
||||
"m.room.canonical_alias",
|
||||
{"alias": self._settings.comms_room_alias},
|
||||
)
|
||||
self._set_directory_visibility(client, token, new_room_id, "public")
|
||||
|
||||
bot_user_id = _canon_user(self._settings.comms_bot_user, self._settings.comms_server_name)
|
||||
self._invite_user(client, token, new_room_id, bot_user_id)
|
||||
for uid in self._list_joined_members(client, token, old_room_id):
|
||||
if uid == _canon_user(self._settings.comms_seeder_user, self._settings.comms_server_name):
|
||||
continue
|
||||
localpart = uid.split(":", 1)[0].lstrip("@")
|
||||
if localpart.isdigit():
|
||||
continue
|
||||
self._invite_user(client, token, new_room_id, uid)
|
||||
|
||||
event_id = self._send_message(client, token, new_room_id, self._settings.comms_pin_message)
|
||||
if not event_id:
|
||||
raise RuntimeError("pin message event_id missing")
|
||||
self._set_room_state(client, token, new_room_id, "m.room.pinned_events", {"pinned": [event_id]})
|
||||
|
||||
self._set_directory_visibility(client, token, old_room_id, "private")
|
||||
self._set_room_state(client, token, old_room_id, "m.room.join_rules", {"join_rule": "invite"})
|
||||
self._set_room_state(client, token, old_room_id, "m.room.guest_access", {"guest_access": "forbidden"})
|
||||
self._set_room_state(
|
||||
client,
|
||||
token,
|
||||
old_room_id,
|
||||
"m.room.tombstone",
|
||||
{
|
||||
"body": "Othrys has been reset. Please join the new room.",
|
||||
"replacement_room": new_room_id,
|
||||
},
|
||||
)
|
||||
self._send_message(
|
||||
client,
|
||||
token,
|
||||
old_room_id,
|
||||
"Othrys was reset. Join the new room at https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join",
|
||||
)
|
||||
|
||||
return {"status": "ok", "detail": f"old_room_id={old_room_id} new_room_id={new_room_id}"}
|
||||
|
||||
def run_seed_room(self, wait: bool = True) -> dict[str, Any]:
|
||||
if not self._settings.comms_seeder_password or not self._settings.comms_bot_password:
|
||||
raise RuntimeError("comms seeder/bot password missing")
|
||||
|
||||
with self._client() as client:
|
||||
token = self._login(client, self._settings.comms_seeder_user, self._settings.comms_seeder_password)
|
||||
for user, password, admin in (
|
||||
(self._settings.comms_seeder_user, self._settings.comms_seeder_password, True),
|
||||
(self._settings.comms_bot_user, self._settings.comms_bot_password, False),
|
||||
):
|
||||
try:
|
||||
self._ensure_user(client, token, user, password, admin)
|
||||
except RuntimeError as exc:
|
||||
message = str(exc)
|
||||
if "You are not a server admin" in message:
|
||||
logger.warning(
|
||||
"comms seed room ensure skipped",
|
||||
extra={"event": "comms_seed_room", "user": user, "detail": message},
|
||||
)
|
||||
continue
|
||||
raise
|
||||
room_id = self._ensure_room(client, token)
|
||||
self._join_user(client, token, room_id, _canon_user(self._settings.comms_bot_user, self._settings.comms_server_name))
|
||||
self._join_all_locals(client, token, room_id)
|
||||
return {"status": "ok", "detail": "room seeded"}
|
||||
|
||||
def _login(self, client: httpx.Client, user: str, password: str) -> str:
|
||||
resp = client.post(
|
||||
f"{self._settings.comms_auth_base}/_matrix/client/v3/login",
|
||||
json={
|
||||
"type": "m.login.password",
|
||||
"identifier": {"type": "m.id.user", "user": _canon_user(user, self._settings.comms_server_name)},
|
||||
"password": password,
|
||||
},
|
||||
)
|
||||
if resp.status_code != HTTP_OK:
|
||||
raise RuntimeError(f"login failed: {resp.status_code} {resp.text}")
|
||||
payload = resp.json()
|
||||
token = payload.get("access_token")
|
||||
if not isinstance(token, str) or not token:
|
||||
raise RuntimeError("login missing token")
|
||||
return token
|
||||
|
||||
def _login_with_retry(self, client: httpx.Client, user: str, password: str) -> str:
|
||||
last: Exception | None = None
|
||||
for attempt in range(1, 6):
|
||||
try:
|
||||
return self._login(client, user, password)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
last = exc
|
||||
self._sleep(attempt * 2)
|
||||
raise RuntimeError(str(last) if last else "login failed")
|
||||
|
||||
def _resolve_alias(self, client: httpx.Client, token: str, alias: str) -> str:
|
||||
resp = client.get(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/directory/room/{urllib.parse.quote(alias)}",
|
||||
headers=_auth(token),
|
||||
)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
return payload["room_id"]
|
||||
|
||||
def _get_pinned(self, client: httpx.Client, token: str, room_id: str) -> list[str]:
|
||||
resp = client.get(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events",
|
||||
headers=_auth(token),
|
||||
)
|
||||
if resp.status_code == HTTP_NOT_FOUND:
|
||||
return []
|
||||
resp.raise_for_status()
|
||||
pinned = resp.json().get("pinned", [])
|
||||
return [item for item in pinned if isinstance(item, str)]
|
||||
|
||||
def _get_event(self, client: httpx.Client, token: str, room_id: str, event_id: str) -> dict[str, Any] | None:
|
||||
resp = client.get(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/event/{urllib.parse.quote(event_id)}",
|
||||
headers=_auth(token),
|
||||
)
|
||||
if resp.status_code == HTTP_NOT_FOUND:
|
||||
return None
|
||||
resp.raise_for_status()
|
||||
return resp.json()
|
||||
|
||||
def _send_message(self, client: httpx.Client, token: str, room_id: str, body: str) -> str:
|
||||
resp = client.post(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message",
|
||||
headers=_auth(token),
|
||||
json={"msgtype": "m.text", "body": body},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
event_id = payload.get("event_id")
|
||||
return event_id if isinstance(event_id, str) else ""
|
||||
|
||||
def _pin_message(self, client: httpx.Client, token: str, room_id: str, event_id: str) -> None:
|
||||
resp = client.put(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events",
|
||||
headers=_auth(token),
|
||||
json={"pinned": [event_id]},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
def _create_room(self, client: httpx.Client, token: str, name: str) -> str:
|
||||
resp = client.post(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/createRoom",
|
||||
headers=_auth(token),
|
||||
json={"preset": "public_chat", "name": name, "room_version": "11"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json()["room_id"]
|
||||
|
||||
def _set_room_state(self, client: httpx.Client, token: str, room_id: str, ev_type: str, content: dict[str, Any]) -> None:
|
||||
resp = client.put(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/{ev_type}",
|
||||
headers=_auth(token),
|
||||
json=content,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
def _set_directory_visibility(self, client: httpx.Client, token: str, room_id: str, visibility: str) -> None:
|
||||
resp = client.put(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/directory/list/room/{urllib.parse.quote(room_id)}",
|
||||
headers=_auth(token),
|
||||
json={"visibility": visibility},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
def _delete_alias(self, client: httpx.Client, token: str, alias: str) -> None:
|
||||
resp = client.delete(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/directory/room/{urllib.parse.quote(alias)}",
|
||||
headers=_auth(token),
|
||||
)
|
||||
if resp.status_code in (HTTP_OK, HTTP_ACCEPTED, HTTP_NOT_FOUND):
|
||||
return
|
||||
resp.raise_for_status()
|
||||
|
||||
def _put_alias(self, client: httpx.Client, token: str, alias: str, room_id: str) -> None:
|
||||
resp = client.put(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/directory/room/{urllib.parse.quote(alias)}",
|
||||
headers=_auth(token),
|
||||
json={"room_id": room_id},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
|
||||
def _list_joined_members(self, client: httpx.Client, token: str, room_id: str) -> list[str]:
|
||||
resp = client.get(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members?membership=join",
|
||||
headers=_auth(token),
|
||||
)
|
||||
resp.raise_for_status()
|
||||
members = []
|
||||
for ev in resp.json().get("chunk", []) or []:
|
||||
if ev.get("type") != "m.room.member":
|
||||
continue
|
||||
uid = ev.get("state_key")
|
||||
if isinstance(uid, str) and uid.startswith("@"):
|
||||
members.append(uid)
|
||||
return members
|
||||
|
||||
def _invite_user(self, client: httpx.Client, token: str, room_id: str, user_id: str) -> None:
|
||||
resp = client.post(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/invite",
|
||||
headers=_auth(token),
|
||||
json={"user_id": user_id},
|
||||
)
|
||||
if resp.status_code in (HTTP_OK, HTTP_ACCEPTED):
|
||||
return
|
||||
resp.raise_for_status()
|
||||
|
||||
def _power_levels(self) -> dict[str, Any]:
|
||||
return {
|
||||
"ban": 50,
|
||||
"events": {
|
||||
"m.room.avatar": 50,
|
||||
"m.room.canonical_alias": 50,
|
||||
"m.room.encryption": 100,
|
||||
"m.room.history_visibility": 100,
|
||||
"m.room.name": 50,
|
||||
"m.room.power_levels": 100,
|
||||
"m.room.server_acl": 100,
|
||||
"m.room.tombstone": 100,
|
||||
},
|
||||
"events_default": 0,
|
||||
"historical": 100,
|
||||
"invite": 50,
|
||||
"kick": 50,
|
||||
"m.call.invite": 50,
|
||||
"redact": 50,
|
||||
"state_default": 50,
|
||||
"users": {_canon_user(self._settings.comms_seeder_user, self._settings.comms_server_name): 100},
|
||||
"users_default": 0,
|
||||
}
|
||||
|
||||
def _ensure_user(self, client: httpx.Client, token: str, localpart: str, password: str, admin: bool) -> None:
|
||||
admin_token = self._admin_token(token)
|
||||
user_id = _canon_user(localpart, self._settings.comms_server_name)
|
||||
url = f"{self._settings.comms_synapse_base}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}"
|
||||
resp = client.get(url, headers=_auth(admin_token))
|
||||
if resp.status_code == HTTP_OK:
|
||||
return
|
||||
payload = {"password": password, "admin": admin, "deactivated": False}
|
||||
create = client.put(url, headers=_auth(admin_token), json=payload)
|
||||
if create.status_code not in (HTTP_OK, HTTP_CREATED):
|
||||
raise RuntimeError(f"create user {user_id} failed: {create.status_code} {create.text}")
|
||||
|
||||
def _ensure_room(self, client: httpx.Client, token: str) -> str:
|
||||
alias = self._settings.comms_room_alias
|
||||
alias_enc = urllib.parse.quote(alias)
|
||||
exists = client.get(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/directory/room/{alias_enc}",
|
||||
headers=_auth(token),
|
||||
)
|
||||
if exists.status_code == HTTP_OK:
|
||||
room_id = exists.json()["room_id"]
|
||||
else:
|
||||
create = client.post(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/createRoom",
|
||||
headers=_auth(token),
|
||||
json={
|
||||
"preset": "public_chat",
|
||||
"name": self._settings.comms_room_name,
|
||||
"room_alias_name": alias.split(":", 1)[0].lstrip("#"),
|
||||
"initial_state": [],
|
||||
"power_level_content_override": {
|
||||
"events_default": 0,
|
||||
"users_default": 0,
|
||||
"state_default": 50,
|
||||
},
|
||||
},
|
||||
)
|
||||
if create.status_code not in (HTTP_OK, HTTP_CONFLICT):
|
||||
raise RuntimeError(f"create room failed: {create.status_code} {create.text}")
|
||||
exists = client.get(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/directory/room/{alias_enc}",
|
||||
headers=_auth(token),
|
||||
)
|
||||
room_id = exists.json()["room_id"]
|
||||
|
||||
state_events = [
|
||||
("m.room.join_rules", {"join_rule": "public"}),
|
||||
("m.room.guest_access", {"guest_access": "can_join"}),
|
||||
("m.room.history_visibility", {"history_visibility": "shared"}),
|
||||
("m.room.canonical_alias", {"alias": alias}),
|
||||
]
|
||||
for ev_type, content in state_events:
|
||||
client.put(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/rooms/{room_id}/state/{ev_type}",
|
||||
headers=_auth(token),
|
||||
json=content,
|
||||
)
|
||||
client.put(
|
||||
f"{self._settings.comms_synapse_base}/_matrix/client/v3/directory/list/room/{room_id}",
|
||||
headers=_auth(token),
|
||||
json={"visibility": "public"},
|
||||
)
|
||||
return room_id
|
||||
|
||||
def _join_user(self, client: httpx.Client, token: str, room_id: str, user_id: str) -> None:
|
||||
admin_token = self._admin_token(token)
|
||||
client.post(
|
||||
f"{self._settings.comms_synapse_base}/_synapse/admin/v1/join/{urllib.parse.quote(room_id)}",
|
||||
headers=_auth(admin_token),
|
||||
json={"user_id": user_id},
|
||||
)
|
||||
|
||||
def _join_all_locals(self, client: httpx.Client, token: str, room_id: str) -> None:
|
||||
users: list[str] = []
|
||||
from_token = None
|
||||
admin_token = self._admin_token(token)
|
||||
while True:
|
||||
url = f"{self._settings.comms_synapse_base}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100"
|
||||
if from_token:
|
||||
url += f"&from={from_token}"
|
||||
resp = client.get(url, headers=_auth(admin_token))
|
||||
payload = resp.json()
|
||||
users.extend([u["name"] for u in payload.get("users", []) if isinstance(u, dict) and u.get("name")])
|
||||
from_token = payload.get("next_token")
|
||||
if not from_token:
|
||||
break
|
||||
for uid in users:
|
||||
self._join_user(client, token, room_id, uid)
|
||||
@ -2,10 +2,8 @@ from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
import textwrap
|
||||
|
||||
import httpx
|
||||
from typing import Any
|
||||
|
||||
from ..k8s.exec import ExecError, PodExecutor
|
||||
from ..k8s.pods import PodSelectionError
|
||||
@ -13,6 +11,8 @@ from ..settings import settings
|
||||
from ..utils.logging import get_logger
|
||||
from ..utils.passwords import random_password
|
||||
from .keycloak_admin import keycloak_admin
|
||||
from .firefly_scripts import FIREFLY_PASSWORD_CHECK_SCRIPT as _FIREFLY_PASSWORD_CHECK_SCRIPT
|
||||
from .firefly_scripts import FIREFLY_SYNC_SCRIPT as _FIREFLY_SYNC_SCRIPT
|
||||
from .mailu import mailu
|
||||
|
||||
|
||||
@ -27,230 +27,6 @@ FIREFLY_PASSWORD_ROTATED_ATTR = "firefly_password_rotated_at"
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
_FIREFLY_SYNC_SCRIPT = textwrap.dedent(
|
||||
"""
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
use FireflyIII\\Console\\Commands\\Correction\\CreatesGroupMemberships;
|
||||
use FireflyIII\\Models\\Role;
|
||||
use FireflyIII\\Repositories\\User\\UserRepositoryInterface;
|
||||
use FireflyIII\\Support\\Facades\\FireflyConfig;
|
||||
use FireflyIII\\User;
|
||||
use Illuminate\\Contracts\\Console\\Kernel as ConsoleKernel;
|
||||
|
||||
function log_line(string $message): void
|
||||
{
|
||||
fwrite(STDOUT, $message . PHP_EOL);
|
||||
}
|
||||
|
||||
function error_line(string $message): void
|
||||
{
|
||||
fwrite(STDERR, $message . PHP_EOL);
|
||||
}
|
||||
|
||||
function find_app_root(): string
|
||||
{
|
||||
$candidates = [];
|
||||
$env_root = getenv('FIREFLY_APP_DIR') ?: '';
|
||||
if ($env_root !== '') {
|
||||
$candidates[] = $env_root;
|
||||
}
|
||||
$candidates[] = '/var/www/html';
|
||||
$candidates[] = '/var/www/firefly-iii';
|
||||
$candidates[] = '/app';
|
||||
|
||||
foreach ($candidates as $candidate) {
|
||||
if (!is_dir($candidate)) {
|
||||
continue;
|
||||
}
|
||||
if (file_exists($candidate . '/vendor/autoload.php')) {
|
||||
return $candidate;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
$email = trim((string) getenv('FIREFLY_USER_EMAIL'));
|
||||
$password = (string) getenv('FIREFLY_USER_PASSWORD');
|
||||
|
||||
if ($email === '' || $password === '') {
|
||||
error_line('missing FIREFLY_USER_EMAIL or FIREFLY_USER_PASSWORD');
|
||||
exit(1);
|
||||
}
|
||||
|
||||
$root = find_app_root();
|
||||
if ($root === '') {
|
||||
error_line('firefly app root not found');
|
||||
exit(1);
|
||||
}
|
||||
|
||||
$autoload = $root . '/vendor/autoload.php';
|
||||
$app_bootstrap = $root . '/bootstrap/app.php';
|
||||
|
||||
if (!file_exists($autoload) || !file_exists($app_bootstrap)) {
|
||||
error_line('firefly bootstrap files missing');
|
||||
exit(1);
|
||||
}
|
||||
|
||||
require $autoload;
|
||||
$app = require $app_bootstrap;
|
||||
|
||||
$kernel = $app->make(ConsoleKernel::class);
|
||||
$kernel->bootstrap();
|
||||
|
||||
try {
|
||||
FireflyConfig::set('single_user_mode', true);
|
||||
} catch (Throwable $exc) {
|
||||
error_line('failed to enforce single_user_mode: ' . $exc->getMessage());
|
||||
}
|
||||
|
||||
$repository = $app->make(UserRepositoryInterface::class);
|
||||
|
||||
$existing_user = User::where('email', $email)->first();
|
||||
$first_user = User::count() == 0;
|
||||
|
||||
if (!$existing_user) {
|
||||
$existing_user = User::create(
|
||||
[
|
||||
'email' => $email,
|
||||
'password' => bcrypt($password),
|
||||
'blocked' => false,
|
||||
'blocked_code' => null,
|
||||
]
|
||||
);
|
||||
|
||||
if ($first_user) {
|
||||
$role = Role::where('name', 'owner')->first();
|
||||
if ($role) {
|
||||
$existing_user->roles()->attach($role);
|
||||
}
|
||||
}
|
||||
|
||||
log_line(sprintf('created firefly user %s', $email));
|
||||
} else {
|
||||
log_line(sprintf('updating firefly user %s', $email));
|
||||
}
|
||||
|
||||
$existing_user->blocked = false;
|
||||
$existing_user->blocked_code = null;
|
||||
$existing_user->save();
|
||||
|
||||
$repository->changePassword($existing_user, $password);
|
||||
CreatesGroupMemberships::createGroupMembership($existing_user);
|
||||
|
||||
log_line('firefly user sync complete');
|
||||
"""
|
||||
).strip()
|
||||
|
||||
_FIREFLY_PASSWORD_CHECK_SCRIPT = textwrap.dedent(
|
||||
"""
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
use FireflyIII\\Support\\Facades\\FireflyConfig;
|
||||
use FireflyIII\\User;
|
||||
use Illuminate\\Contracts\\Console\\Kernel as ConsoleKernel;
|
||||
use Illuminate\\Support\\Facades\\Hash;
|
||||
|
||||
function log_line(string $message): void
|
||||
{
|
||||
fwrite(STDOUT, $message . PHP_EOL);
|
||||
}
|
||||
|
||||
function error_line(string $message): void
|
||||
{
|
||||
fwrite(STDERR, $message . PHP_EOL);
|
||||
}
|
||||
|
||||
function find_app_root(): string
|
||||
{
|
||||
$candidates = [];
|
||||
$env_root = getenv('FIREFLY_APP_DIR') ?: '';
|
||||
if ($env_root !== '') {
|
||||
$candidates[] = $env_root;
|
||||
}
|
||||
$candidates[] = '/var/www/html';
|
||||
$candidates[] = '/var/www/firefly-iii';
|
||||
$candidates[] = '/app';
|
||||
|
||||
foreach ($candidates as $candidate) {
|
||||
if (!is_dir($candidate)) {
|
||||
continue;
|
||||
}
|
||||
if (file_exists($candidate . '/vendor/autoload.php')) {
|
||||
return $candidate;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
$email = trim((string) getenv('FIREFLY_USER_EMAIL'));
|
||||
$username = trim((string) getenv('FIREFLY_USER_USERNAME'));
|
||||
$password = (string) getenv('FIREFLY_USER_PASSWORD');
|
||||
|
||||
if (($email === '' && $username === '') || $password === '') {
|
||||
error_line('missing FIREFLY_USER_EMAIL or FIREFLY_USER_USERNAME or FIREFLY_USER_PASSWORD');
|
||||
exit(2);
|
||||
}
|
||||
|
||||
$root = find_app_root();
|
||||
if ($root === '') {
|
||||
error_line('firefly app root not found');
|
||||
exit(2);
|
||||
}
|
||||
|
||||
$autoload = $root . '/vendor/autoload.php';
|
||||
$app_bootstrap = $root . '/bootstrap/app.php';
|
||||
|
||||
if (!file_exists($autoload) || !file_exists($app_bootstrap)) {
|
||||
error_line('firefly bootstrap files missing');
|
||||
exit(2);
|
||||
}
|
||||
|
||||
require $autoload;
|
||||
$app = require $app_bootstrap;
|
||||
|
||||
$kernel = $app->make(ConsoleKernel::class);
|
||||
$kernel->bootstrap();
|
||||
|
||||
try {
|
||||
FireflyConfig::set('single_user_mode', true);
|
||||
} catch (Throwable $exc) {
|
||||
error_line('failed to enforce single_user_mode: ' . $exc->getMessage());
|
||||
}
|
||||
|
||||
if ($email !== '') {
|
||||
$query = User::where('email', $email);
|
||||
} else {
|
||||
$query = User::where('username', $username);
|
||||
}
|
||||
|
||||
if ($email !== '' && $username !== '') {
|
||||
$query = $query->orWhere('username', $username);
|
||||
}
|
||||
|
||||
$existing_user = $query->first();
|
||||
if (!$existing_user) {
|
||||
error_line('firefly user missing');
|
||||
exit(3);
|
||||
}
|
||||
|
||||
if (Hash::check($password, $existing_user->password)) {
|
||||
log_line('password match');
|
||||
exit(0);
|
||||
}
|
||||
|
||||
log_line('password mismatch');
|
||||
exit(1);
|
||||
"""
|
||||
).strip()
|
||||
|
||||
|
||||
def _firefly_exec_command() -> str:
|
||||
return f"php <<'PHP'\n{_FIREFLY_SYNC_SCRIPT}\nPHP"
|
||||
|
||||
@ -498,6 +274,8 @@ def _rotation_check_input(username: str) -> tuple[FireflySyncInput | UserSyncOut
|
||||
|
||||
|
||||
class FireflyService:
|
||||
"""Synchronize Keycloak users and password rotations into Firefly."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._executor = PodExecutor(
|
||||
settings.firefly_namespace,
|
||||
|
||||
230
ariadne/services/firefly_scripts.py
Normal file
230
ariadne/services/firefly_scripts.py
Normal file
@ -0,0 +1,230 @@
|
||||
"""Embedded scripts executed inside the firefly application pod."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import textwrap
|
||||
|
||||
FIREFLY_SYNC_SCRIPT = textwrap.dedent(
|
||||
"""
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
use FireflyIII\\Console\\Commands\\Correction\\CreatesGroupMemberships;
|
||||
use FireflyIII\\Models\\Role;
|
||||
use FireflyIII\\Repositories\\User\\UserRepositoryInterface;
|
||||
use FireflyIII\\Support\\Facades\\FireflyConfig;
|
||||
use FireflyIII\\User;
|
||||
use Illuminate\\Contracts\\Console\\Kernel as ConsoleKernel;
|
||||
|
||||
function log_line(string $message): void
|
||||
{
|
||||
fwrite(STDOUT, $message . PHP_EOL);
|
||||
}
|
||||
|
||||
function error_line(string $message): void
|
||||
{
|
||||
fwrite(STDERR, $message . PHP_EOL);
|
||||
}
|
||||
|
||||
function find_app_root(): string
|
||||
{
|
||||
$candidates = [];
|
||||
$env_root = getenv('FIREFLY_APP_DIR') ?: '';
|
||||
if ($env_root !== '') {
|
||||
$candidates[] = $env_root;
|
||||
}
|
||||
$candidates[] = '/var/www/html';
|
||||
$candidates[] = '/var/www/firefly-iii';
|
||||
$candidates[] = '/app';
|
||||
|
||||
foreach ($candidates as $candidate) {
|
||||
if (!is_dir($candidate)) {
|
||||
continue;
|
||||
}
|
||||
if (file_exists($candidate . '/vendor/autoload.php')) {
|
||||
return $candidate;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
$email = trim((string) getenv('FIREFLY_USER_EMAIL'));
|
||||
$password = (string) getenv('FIREFLY_USER_PASSWORD');
|
||||
|
||||
if ($email === '' || $password === '') {
|
||||
error_line('missing FIREFLY_USER_EMAIL or FIREFLY_USER_PASSWORD');
|
||||
exit(1);
|
||||
}
|
||||
|
||||
$root = find_app_root();
|
||||
if ($root === '') {
|
||||
error_line('firefly app root not found');
|
||||
exit(1);
|
||||
}
|
||||
|
||||
$autoload = $root . '/vendor/autoload.php';
|
||||
$app_bootstrap = $root . '/bootstrap/app.php';
|
||||
|
||||
if (!file_exists($autoload) || !file_exists($app_bootstrap)) {
|
||||
error_line('firefly bootstrap files missing');
|
||||
exit(1);
|
||||
}
|
||||
|
||||
require $autoload;
|
||||
$app = require $app_bootstrap;
|
||||
|
||||
$kernel = $app->make(ConsoleKernel::class);
|
||||
$kernel->bootstrap();
|
||||
|
||||
try {
|
||||
FireflyConfig::set('single_user_mode', true);
|
||||
} catch (Throwable $exc) {
|
||||
error_line('failed to enforce single_user_mode: ' . $exc->getMessage());
|
||||
}
|
||||
|
||||
$repository = $app->make(UserRepositoryInterface::class);
|
||||
|
||||
$existing_user = User::where('email', $email)->first();
|
||||
$first_user = User::count() == 0;
|
||||
|
||||
if (!$existing_user) {
|
||||
$existing_user = User::create(
|
||||
[
|
||||
'email' => $email,
|
||||
'password' => bcrypt($password),
|
||||
'blocked' => false,
|
||||
'blocked_code' => null,
|
||||
]
|
||||
);
|
||||
|
||||
if ($first_user) {
|
||||
$role = Role::where('name', 'owner')->first();
|
||||
if ($role) {
|
||||
$existing_user->roles()->attach($role);
|
||||
}
|
||||
}
|
||||
|
||||
log_line(sprintf('created firefly user %s', $email));
|
||||
} else {
|
||||
log_line(sprintf('updating firefly user %s', $email));
|
||||
}
|
||||
|
||||
$existing_user->blocked = false;
|
||||
$existing_user->blocked_code = null;
|
||||
$existing_user->save();
|
||||
|
||||
$repository->changePassword($existing_user, $password);
|
||||
CreatesGroupMemberships::createGroupMembership($existing_user);
|
||||
|
||||
log_line('firefly user sync complete');
|
||||
"""
|
||||
).strip()
|
||||
|
||||
FIREFLY_PASSWORD_CHECK_SCRIPT = textwrap.dedent(
|
||||
"""
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
use FireflyIII\\Support\\Facades\\FireflyConfig;
|
||||
use FireflyIII\\User;
|
||||
use Illuminate\\Contracts\\Console\\Kernel as ConsoleKernel;
|
||||
use Illuminate\\Support\\Facades\\Hash;
|
||||
|
||||
function log_line(string $message): void
|
||||
{
|
||||
fwrite(STDOUT, $message . PHP_EOL);
|
||||
}
|
||||
|
||||
function error_line(string $message): void
|
||||
{
|
||||
fwrite(STDERR, $message . PHP_EOL);
|
||||
}
|
||||
|
||||
function find_app_root(): string
|
||||
{
|
||||
$candidates = [];
|
||||
$env_root = getenv('FIREFLY_APP_DIR') ?: '';
|
||||
if ($env_root !== '') {
|
||||
$candidates[] = $env_root;
|
||||
}
|
||||
$candidates[] = '/var/www/html';
|
||||
$candidates[] = '/var/www/firefly-iii';
|
||||
$candidates[] = '/app';
|
||||
|
||||
foreach ($candidates as $candidate) {
|
||||
if (!is_dir($candidate)) {
|
||||
continue;
|
||||
}
|
||||
if (file_exists($candidate . '/vendor/autoload.php')) {
|
||||
return $candidate;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
$email = trim((string) getenv('FIREFLY_USER_EMAIL'));
|
||||
$username = trim((string) getenv('FIREFLY_USER_USERNAME'));
|
||||
$password = (string) getenv('FIREFLY_USER_PASSWORD');
|
||||
|
||||
if (($email === '' && $username === '') || $password === '') {
|
||||
error_line('missing FIREFLY_USER_EMAIL or FIREFLY_USER_USERNAME or FIREFLY_USER_PASSWORD');
|
||||
exit(2);
|
||||
}
|
||||
|
||||
$root = find_app_root();
|
||||
if ($root === '') {
|
||||
error_line('firefly app root not found');
|
||||
exit(2);
|
||||
}
|
||||
|
||||
$autoload = $root . '/vendor/autoload.php';
|
||||
$app_bootstrap = $root . '/bootstrap/app.php';
|
||||
|
||||
if (!file_exists($autoload) || !file_exists($app_bootstrap)) {
|
||||
error_line('firefly bootstrap files missing');
|
||||
exit(2);
|
||||
}
|
||||
|
||||
require $autoload;
|
||||
$app = require $app_bootstrap;
|
||||
|
||||
$kernel = $app->make(ConsoleKernel::class);
|
||||
$kernel->bootstrap();
|
||||
|
||||
try {
|
||||
FireflyConfig::set('single_user_mode', true);
|
||||
} catch (Throwable $exc) {
|
||||
error_line('failed to enforce single_user_mode: ' . $exc->getMessage());
|
||||
}
|
||||
|
||||
if ($email !== '') {
|
||||
$query = User::where('email', $email);
|
||||
} else {
|
||||
$query = User::where('username', $username);
|
||||
}
|
||||
|
||||
if ($email !== '' && $username !== '') {
|
||||
$query = $query->orWhere('username', $username);
|
||||
}
|
||||
|
||||
$existing_user = $query->first();
|
||||
if (!$existing_user) {
|
||||
error_line('firefly user missing');
|
||||
exit(3);
|
||||
}
|
||||
|
||||
if (Hash::check($password, $existing_user->password)) {
|
||||
log_line('password match');
|
||||
exit(0);
|
||||
}
|
||||
|
||||
log_line('password mismatch');
|
||||
exit(1);
|
||||
"""
|
||||
).strip()
|
||||
|
||||
|
||||
@ -107,6 +107,8 @@ sleep infinity
|
||||
|
||||
|
||||
class ImageSweeperService:
|
||||
"""Create Kubernetes cleanup jobs that prune stale node images."""
|
||||
|
||||
def _job_payload(self, job_name: str) -> dict[str, Any]:
|
||||
job: dict[str, Any] = {
|
||||
"apiVersion": "batch/v1",
|
||||
|
||||
418
ariadne/services/jenkins_build_weather.py
Normal file
418
ariadne/services/jenkins_build_weather.py
Normal file
@ -0,0 +1,418 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
import threading
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
from prometheus_client import Counter, Gauge
|
||||
|
||||
from ..settings import settings
|
||||
from ..utils.logging import get_logger
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
JENKINS_BUILD_WEATHER_RUNS_TOTAL = Counter(
|
||||
"ariadne_jenkins_build_weather_runs_total",
|
||||
"Jenkins build weather collector runs by status",
|
||||
["status"],
|
||||
)
|
||||
JENKINS_BUILD_WEATHER_LAST_RUN_TS = Gauge(
|
||||
"ariadne_jenkins_build_weather_last_run_timestamp_seconds",
|
||||
"Last Jenkins build weather collection timestamp",
|
||||
)
|
||||
JENKINS_BUILD_WEATHER_LAST_SUCCESS_TS = Gauge(
|
||||
"ariadne_jenkins_build_weather_last_success_timestamp_seconds",
|
||||
"Last successful Jenkins build weather collection timestamp",
|
||||
)
|
||||
JENKINS_BUILD_WEATHER_LAST_FAILURE_TS = Gauge(
|
||||
"ariadne_jenkins_build_weather_last_failure_timestamp_seconds",
|
||||
"Last failed Jenkins build weather collection timestamp",
|
||||
)
|
||||
JENKINS_BUILD_WEATHER_JOBS_TOTAL = Gauge(
|
||||
"ariadne_jenkins_build_weather_jobs_total",
|
||||
"Jenkins jobs observed in the latest weather collection",
|
||||
["status"],
|
||||
)
|
||||
|
||||
JENKINS_BUILD_WEATHER_JOB_LAST_RUN_TS = Gauge(
|
||||
"ariadne_jenkins_build_weather_job_last_run_timestamp_seconds",
|
||||
"Jenkins job last run timestamp",
|
||||
["job", "job_url", "weather_icon"],
|
||||
)
|
||||
JENKINS_BUILD_WEATHER_JOB_LAST_SUCCESS_TS = Gauge(
|
||||
"ariadne_jenkins_build_weather_job_last_success_timestamp_seconds",
|
||||
"Jenkins job last success timestamp",
|
||||
["job", "job_url", "weather_icon"],
|
||||
)
|
||||
JENKINS_BUILD_WEATHER_JOB_LAST_FAILURE_TS = Gauge(
|
||||
"ariadne_jenkins_build_weather_job_last_failure_timestamp_seconds",
|
||||
"Jenkins job last failure timestamp",
|
||||
["job", "job_url", "weather_icon"],
|
||||
)
|
||||
JENKINS_BUILD_WEATHER_JOB_LAST_DURATION_SECONDS = Gauge(
|
||||
"ariadne_jenkins_build_weather_job_last_duration_seconds",
|
||||
"Jenkins job last build duration in seconds",
|
||||
["job", "job_url", "weather_icon"],
|
||||
)
|
||||
JENKINS_BUILD_WEATHER_JOB_LAST_STATUS = Gauge(
|
||||
"ariadne_jenkins_build_weather_job_last_status",
|
||||
"Jenkins job last build status (1=success,0=failure,2=running,-1=unknown)",
|
||||
["job", "job_url", "weather_icon"],
|
||||
)
|
||||
JENKINS_BUILD_WEATHER_JOB_HEALTH_SCORE = Gauge(
|
||||
"ariadne_jenkins_build_weather_job_health_score",
|
||||
"Jenkins job weather health score (0-100)",
|
||||
["job", "job_url", "weather_icon"],
|
||||
)
|
||||
|
||||
|
||||
_JENKINS_JOBS_TREE = (
|
||||
"jobs[name,url,color,healthReport[score],lastBuild[result,timestamp,duration],"
|
||||
"lastSuccessfulBuild[timestamp],lastFailedBuild[timestamp],"
|
||||
"jobs[name,url,color,healthReport[score],lastBuild[result,timestamp,duration],"
|
||||
"lastSuccessfulBuild[timestamp],lastFailedBuild[timestamp]]]"
|
||||
)
|
||||
|
||||
_STATUS_VALUES = {
|
||||
"success": 1.0,
|
||||
"failure": 0.0,
|
||||
"running": 2.0,
|
||||
"unknown": -1.0,
|
||||
}
|
||||
|
||||
_JOB_SERIES: set[tuple[str, str, str]] = set()
|
||||
_JOB_SERIES_LOCK = threading.Lock()
|
||||
_JOB_METRICS = (
|
||||
JENKINS_BUILD_WEATHER_JOB_LAST_RUN_TS,
|
||||
JENKINS_BUILD_WEATHER_JOB_LAST_SUCCESS_TS,
|
||||
JENKINS_BUILD_WEATHER_JOB_LAST_FAILURE_TS,
|
||||
JENKINS_BUILD_WEATHER_JOB_LAST_DURATION_SECONDS,
|
||||
JENKINS_BUILD_WEATHER_JOB_LAST_STATUS,
|
||||
JENKINS_BUILD_WEATHER_JOB_HEALTH_SCORE,
|
||||
)
|
||||
|
||||
_WEATHER_SUNNY_MIN_SCORE = 80
|
||||
_WEATHER_PARTLY_CLOUDY_MIN_SCORE = 60
|
||||
_WEATHER_CLOUDY_MIN_SCORE = 40
|
||||
_WEATHER_RAINY_MIN_SCORE = 20
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class JenkinsBuildWeatherJob:
|
||||
job: str
|
||||
job_url: str
|
||||
weather_icon: str
|
||||
status: str
|
||||
last_run_ts: float
|
||||
last_success_ts: float
|
||||
last_failure_ts: float
|
||||
last_duration_seconds: float
|
||||
health_score: float
|
||||
|
||||
@property
|
||||
def series_key(self) -> tuple[str, str, str]:
|
||||
return (self.job, self.job_url, self.weather_icon)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class JenkinsBuildWeatherSummary:
|
||||
jobs_total: int
|
||||
success_total: int
|
||||
failure_total: int
|
||||
running_total: int
|
||||
unknown_total: int
|
||||
|
||||
|
||||
def _metric_number(value: Any) -> float:
|
||||
if isinstance(value, bool):
|
||||
return 0.0
|
||||
if isinstance(value, (int, float)):
|
||||
return float(value)
|
||||
return 0.0
|
||||
|
||||
|
||||
def _millis_to_seconds(value: Any) -> float:
|
||||
raw = _metric_number(value)
|
||||
if raw <= 0:
|
||||
return 0.0
|
||||
return raw / 1000.0
|
||||
|
||||
|
||||
def _jenkins_auth() -> tuple[str, str] | None:
|
||||
username = settings.jenkins_api_user.strip()
|
||||
token = settings.jenkins_api_token.strip()
|
||||
if username and token:
|
||||
return (username, token)
|
||||
return None
|
||||
|
||||
|
||||
def _jenkins_status(job: dict[str, Any]) -> str:
|
||||
last_build = job.get("lastBuild") if isinstance(job.get("lastBuild"), dict) else {}
|
||||
result = str(last_build.get("result") or "").upper().strip()
|
||||
color = str(job.get("color") or "").lower().strip()
|
||||
|
||||
if color.endswith("_anime"):
|
||||
return "running"
|
||||
if result == "SUCCESS":
|
||||
return "success"
|
||||
if result in {"FAILURE", "ABORTED", "UNSTABLE", "NOT_BUILT"}:
|
||||
return "failure"
|
||||
if color.startswith(("blue", "green")):
|
||||
return "success"
|
||||
if color.startswith(("red", "yellow")):
|
||||
return "failure"
|
||||
return "unknown"
|
||||
|
||||
|
||||
def _health_score(job: dict[str, Any], status: str) -> float:
|
||||
reports = job.get("healthReport")
|
||||
if isinstance(reports, list):
|
||||
for report in reports:
|
||||
if not isinstance(report, dict):
|
||||
continue
|
||||
score = _metric_number(report.get("score"))
|
||||
if score >= 0:
|
||||
return max(0.0, min(score, 100.0))
|
||||
if status == "success":
|
||||
return 100.0
|
||||
if status == "running":
|
||||
return 60.0
|
||||
if status == "failure":
|
||||
return 10.0
|
||||
return -1.0
|
||||
|
||||
|
||||
def _weather_icon(score: float) -> str:
|
||||
if score < 0:
|
||||
return "❔"
|
||||
if score >= _WEATHER_SUNNY_MIN_SCORE:
|
||||
return "☀️"
|
||||
if score >= _WEATHER_PARTLY_CLOUDY_MIN_SCORE:
|
||||
return "⛅"
|
||||
if score >= _WEATHER_CLOUDY_MIN_SCORE:
|
||||
return "☁️"
|
||||
if score >= _WEATHER_RAINY_MIN_SCORE:
|
||||
return "🌧️"
|
||||
return "⛈️"
|
||||
|
||||
|
||||
def _flatten_jobs(items: list[Any], prefix: str = "") -> list[dict[str, Any]]:
|
||||
flattened: list[dict[str, Any]] = []
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
|
||||
name = item.get("name")
|
||||
if not isinstance(name, str) or not name.strip():
|
||||
continue
|
||||
|
||||
full_name = f"{prefix}/{name}" if prefix else name
|
||||
nested_jobs = item.get("jobs") if isinstance(item.get("jobs"), list) else []
|
||||
if nested_jobs:
|
||||
flattened.extend(_flatten_jobs(nested_jobs, prefix=full_name))
|
||||
|
||||
last_build = item.get("lastBuild")
|
||||
if nested_jobs and not isinstance(last_build, dict):
|
||||
continue
|
||||
|
||||
payload = dict(item)
|
||||
payload["name"] = full_name
|
||||
flattened.append(payload)
|
||||
return flattened
|
||||
|
||||
|
||||
def _parse_job(raw: dict[str, Any]) -> JenkinsBuildWeatherJob | None:
|
||||
job = str(raw.get("name") or "").strip()
|
||||
job_url = str(raw.get("url") or "").strip()
|
||||
if not job or not job_url:
|
||||
return None
|
||||
|
||||
status = _jenkins_status(raw)
|
||||
score = _health_score(raw, status)
|
||||
weather_icon = _weather_icon(score)
|
||||
|
||||
last_build = raw.get("lastBuild") if isinstance(raw.get("lastBuild"), dict) else {}
|
||||
last_success = raw.get("lastSuccessfulBuild") if isinstance(raw.get("lastSuccessfulBuild"), dict) else {}
|
||||
last_failure = raw.get("lastFailedBuild") if isinstance(raw.get("lastFailedBuild"), dict) else {}
|
||||
|
||||
return JenkinsBuildWeatherJob(
|
||||
job=job,
|
||||
job_url=job_url,
|
||||
weather_icon=weather_icon,
|
||||
status=status if status in _STATUS_VALUES else "unknown",
|
||||
last_run_ts=_millis_to_seconds(last_build.get("timestamp")),
|
||||
last_success_ts=_millis_to_seconds(last_success.get("timestamp")),
|
||||
last_failure_ts=_millis_to_seconds(last_failure.get("timestamp")),
|
||||
last_duration_seconds=_metric_number(last_build.get("duration")) / 1000.0,
|
||||
health_score=score,
|
||||
)
|
||||
|
||||
|
||||
def _fetch_jobs() -> list[JenkinsBuildWeatherJob]:
|
||||
base_url = settings.jenkins_base_url.strip().rstrip("/")
|
||||
if not base_url:
|
||||
return []
|
||||
|
||||
client_kwargs: dict[str, Any] = {
|
||||
"timeout": settings.jenkins_api_timeout_sec,
|
||||
"follow_redirects": True,
|
||||
}
|
||||
auth = _jenkins_auth()
|
||||
if auth is not None:
|
||||
client_kwargs["auth"] = auth
|
||||
|
||||
with httpx.Client(**client_kwargs) as client:
|
||||
response = client.get(
|
||||
f"{base_url}/api/json",
|
||||
params={"tree": _JENKINS_JOBS_TREE},
|
||||
)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("jenkins API returned a non-object payload")
|
||||
|
||||
items = payload.get("jobs") if isinstance(payload.get("jobs"), list) else []
|
||||
jobs: list[JenkinsBuildWeatherJob] = []
|
||||
for raw in _flatten_jobs(items):
|
||||
parsed = _parse_job(raw)
|
||||
if parsed is None:
|
||||
continue
|
||||
jobs.append(parsed)
|
||||
|
||||
jobs.sort(key=lambda row: row.last_run_ts, reverse=True)
|
||||
return jobs
|
||||
|
||||
|
||||
def _remove_missing_series(current_series: set[tuple[str, str, str]]) -> None:
|
||||
global _JOB_SERIES
|
||||
with _JOB_SERIES_LOCK:
|
||||
removed = _JOB_SERIES - current_series
|
||||
if removed:
|
||||
for labels in removed:
|
||||
for metric in _JOB_METRICS:
|
||||
try:
|
||||
metric.remove(*labels)
|
||||
except KeyError:
|
||||
pass
|
||||
_JOB_SERIES = set(current_series)
|
||||
|
||||
|
||||
def _record_jobs(jobs: list[JenkinsBuildWeatherJob]) -> JenkinsBuildWeatherSummary:
|
||||
counts = {
|
||||
"success": 0,
|
||||
"failure": 0,
|
||||
"running": 0,
|
||||
"unknown": 0,
|
||||
}
|
||||
series: set[tuple[str, str, str]] = set()
|
||||
|
||||
for job in jobs:
|
||||
series.add(job.series_key)
|
||||
counts[job.status] = counts.get(job.status, 0) + 1
|
||||
|
||||
JENKINS_BUILD_WEATHER_JOB_LAST_RUN_TS.labels(
|
||||
job=job.job,
|
||||
job_url=job.job_url,
|
||||
weather_icon=job.weather_icon,
|
||||
).set(job.last_run_ts)
|
||||
JENKINS_BUILD_WEATHER_JOB_LAST_SUCCESS_TS.labels(
|
||||
job=job.job,
|
||||
job_url=job.job_url,
|
||||
weather_icon=job.weather_icon,
|
||||
).set(job.last_success_ts)
|
||||
JENKINS_BUILD_WEATHER_JOB_LAST_FAILURE_TS.labels(
|
||||
job=job.job,
|
||||
job_url=job.job_url,
|
||||
weather_icon=job.weather_icon,
|
||||
).set(job.last_failure_ts)
|
||||
JENKINS_BUILD_WEATHER_JOB_LAST_DURATION_SECONDS.labels(
|
||||
job=job.job,
|
||||
job_url=job.job_url,
|
||||
weather_icon=job.weather_icon,
|
||||
).set(max(job.last_duration_seconds, 0.0))
|
||||
JENKINS_BUILD_WEATHER_JOB_LAST_STATUS.labels(
|
||||
job=job.job,
|
||||
job_url=job.job_url,
|
||||
weather_icon=job.weather_icon,
|
||||
).set(_STATUS_VALUES.get(job.status, _STATUS_VALUES["unknown"]))
|
||||
JENKINS_BUILD_WEATHER_JOB_HEALTH_SCORE.labels(
|
||||
job=job.job,
|
||||
job_url=job.job_url,
|
||||
weather_icon=job.weather_icon,
|
||||
).set(job.health_score)
|
||||
|
||||
_remove_missing_series(series)
|
||||
|
||||
for status in ("success", "failure", "running", "unknown"):
|
||||
JENKINS_BUILD_WEATHER_JOBS_TOTAL.labels(status=status).set(counts.get(status, 0))
|
||||
|
||||
return JenkinsBuildWeatherSummary(
|
||||
jobs_total=len(jobs),
|
||||
success_total=counts.get("success", 0),
|
||||
failure_total=counts.get("failure", 0),
|
||||
running_total=counts.get("running", 0),
|
||||
unknown_total=counts.get("unknown", 0),
|
||||
)
|
||||
|
||||
|
||||
def collect_jenkins_build_weather() -> JenkinsBuildWeatherSummary:
|
||||
"""Collect Jenkins homepage job weather/status into Prometheus gauges."""
|
||||
|
||||
now_ts = datetime.now(timezone.utc).timestamp()
|
||||
JENKINS_BUILD_WEATHER_LAST_RUN_TS.set(now_ts)
|
||||
|
||||
if not settings.jenkins_base_url.strip():
|
||||
JENKINS_BUILD_WEATHER_RUNS_TOTAL.labels(status="skipped").inc()
|
||||
summary = JenkinsBuildWeatherSummary(
|
||||
jobs_total=0,
|
||||
success_total=0,
|
||||
failure_total=0,
|
||||
running_total=0,
|
||||
unknown_total=0,
|
||||
)
|
||||
logger.info(
|
||||
"jenkins build weather skipped",
|
||||
extra={
|
||||
"event": "jenkins_build_weather",
|
||||
"status": "skipped",
|
||||
"detail": "jenkins base url is empty",
|
||||
},
|
||||
)
|
||||
return summary
|
||||
|
||||
try:
|
||||
jobs = _fetch_jobs()
|
||||
summary = _record_jobs(jobs)
|
||||
except Exception as exc:
|
||||
JENKINS_BUILD_WEATHER_RUNS_TOTAL.labels(status="error").inc()
|
||||
JENKINS_BUILD_WEATHER_LAST_FAILURE_TS.set(now_ts)
|
||||
logger.exception(
|
||||
"jenkins build weather collection failed",
|
||||
extra={
|
||||
"event": "jenkins_build_weather",
|
||||
"status": "error",
|
||||
"detail": str(exc),
|
||||
},
|
||||
)
|
||||
raise
|
||||
|
||||
JENKINS_BUILD_WEATHER_RUNS_TOTAL.labels(status="ok").inc()
|
||||
JENKINS_BUILD_WEATHER_LAST_SUCCESS_TS.set(now_ts)
|
||||
logger.info(
|
||||
"jenkins build weather collection finished",
|
||||
extra={
|
||||
"event": "jenkins_build_weather",
|
||||
"status": "ok",
|
||||
"jobs_total": summary.jobs_total,
|
||||
"success_total": summary.success_total,
|
||||
"failure_total": summary.failure_total,
|
||||
"running_total": summary.running_total,
|
||||
"unknown_total": summary.unknown_total,
|
||||
},
|
||||
)
|
||||
return summary
|
||||
261
ariadne/services/jenkins_workspace_candidates.py
Normal file
261
ariadne/services/jenkins_workspace_candidates.py
Normal file
@ -0,0 +1,261 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _CleanupCandidate:
|
||||
name: str
|
||||
kind: str
|
||||
path: str
|
||||
created_at: datetime | None
|
||||
related_pvc: str | None = None
|
||||
pv_name: str | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _LonghornBinding:
|
||||
pvc_name: Any
|
||||
pvc_namespace: Any
|
||||
referenced_pv_name: Any
|
||||
|
||||
|
||||
def _parse_timestamp(raw: str) -> datetime | None:
|
||||
"""Parse Kubernetes RFC3339 timestamps into timezone-aware datetimes."""
|
||||
|
||||
normalized = raw.replace("Z", "+00:00")
|
||||
try:
|
||||
return datetime.fromisoformat(normalized)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _created_at(metadata: dict[str, Any]) -> datetime | None:
|
||||
raw = metadata.get("creationTimestamp")
|
||||
if not isinstance(raw, str) or not raw:
|
||||
return None
|
||||
return _parse_timestamp(raw)
|
||||
|
||||
|
||||
def _is_old_enough(settings_obj: Any, metadata: dict[str, Any]) -> bool:
|
||||
"""Return true when an object age exceeds the configured cleanup threshold."""
|
||||
|
||||
created_at = _created_at(metadata)
|
||||
if created_at is None:
|
||||
return False
|
||||
min_age = timedelta(hours=settings_obj.jenkins_workspace_cleanup_min_age_hours)
|
||||
return datetime.now(timezone.utc) - created_at >= min_age
|
||||
|
||||
|
||||
def _is_deleting(metadata: dict[str, Any]) -> bool:
|
||||
deletion_ts = metadata.get("deletionTimestamp")
|
||||
return isinstance(deletion_ts, str) and bool(deletion_ts.strip())
|
||||
|
||||
|
||||
def _is_workspace_name(settings_obj: Any, name: Any) -> bool:
|
||||
return isinstance(name, str) and name.startswith(settings_obj.jenkins_workspace_pvc_prefix)
|
||||
|
||||
|
||||
def _active_workspace_claims(settings_obj: Any, get_json_func: Callable[[str], dict[str, Any]]) -> set[str]:
|
||||
"""Collect currently referenced Jenkins workspace PVC names from pods."""
|
||||
|
||||
namespace = settings_obj.jenkins_workspace_namespace
|
||||
payload = get_json_func(f"/api/v1/namespaces/{namespace}/pods")
|
||||
items = payload.get("items") if isinstance(payload.get("items"), list) else []
|
||||
active: set[str] = set()
|
||||
for pod in items:
|
||||
if not isinstance(pod, dict):
|
||||
continue
|
||||
metadata = pod.get("metadata") if isinstance(pod.get("metadata"), dict) else {}
|
||||
annotations = metadata.get("annotations") if isinstance(metadata.get("annotations"), dict) else {}
|
||||
spec = pod.get("spec") if isinstance(pod.get("spec"), dict) else {}
|
||||
volumes = spec.get("volumes") if isinstance(spec.get("volumes"), list) else []
|
||||
for volume in volumes:
|
||||
if not isinstance(volume, dict):
|
||||
continue
|
||||
claim = volume.get("persistentVolumeClaim")
|
||||
if not isinstance(claim, dict):
|
||||
continue
|
||||
claim_name = claim.get("claimName")
|
||||
if _is_workspace_name(settings_obj, claim_name):
|
||||
active.add(claim_name)
|
||||
claim_name = annotations.get("jenkins.io/workspace-pvc")
|
||||
if _is_workspace_name(settings_obj, claim_name):
|
||||
active.add(claim_name)
|
||||
return active
|
||||
|
||||
|
||||
def _workspace_pv_candidates(settings_obj: Any, get_json_func: Callable[[str], dict[str, Any]], active_claims: set[str]) -> tuple[list[_CleanupCandidate], set[str]]:
|
||||
"""Find releasable Jenkins workspace PVs and keep a set of all PV names."""
|
||||
|
||||
namespace = settings_obj.jenkins_workspace_namespace
|
||||
payload = get_json_func("/api/v1/persistentvolumes")
|
||||
items = payload.get("items") if isinstance(payload.get("items"), list) else []
|
||||
candidates: list[_CleanupCandidate] = []
|
||||
all_pv_names: set[str] = set()
|
||||
|
||||
for pv in items:
|
||||
if not isinstance(pv, dict):
|
||||
continue
|
||||
metadata = pv.get("metadata") if isinstance(pv.get("metadata"), dict) else {}
|
||||
status = pv.get("status") if isinstance(pv.get("status"), dict) else {}
|
||||
spec = pv.get("spec") if isinstance(pv.get("spec"), dict) else {}
|
||||
name = metadata.get("name")
|
||||
if isinstance(name, str) and name:
|
||||
all_pv_names.add(name)
|
||||
|
||||
claim_ref = spec.get("claimRef") if isinstance(spec.get("claimRef"), dict) else {}
|
||||
claim_namespace = claim_ref.get("namespace")
|
||||
claim_name = claim_ref.get("name")
|
||||
phase = status.get("phase")
|
||||
if claim_namespace != namespace:
|
||||
continue
|
||||
if not _is_workspace_name(settings_obj, claim_name):
|
||||
continue
|
||||
if _is_deleting(metadata):
|
||||
continue
|
||||
if claim_name in active_claims:
|
||||
continue
|
||||
if phase not in {"Released", "Failed"}:
|
||||
continue
|
||||
if not _is_old_enough(settings_obj, metadata):
|
||||
continue
|
||||
if not isinstance(name, str) or not name:
|
||||
continue
|
||||
candidates.append(
|
||||
_CleanupCandidate(
|
||||
name=name,
|
||||
kind="pv",
|
||||
path=f"/api/v1/persistentvolumes/{name}",
|
||||
created_at=_created_at(metadata),
|
||||
related_pvc=claim_name if isinstance(claim_name, str) else None,
|
||||
)
|
||||
)
|
||||
return candidates, all_pv_names
|
||||
|
||||
|
||||
def _workspace_pvc_candidates(settings_obj: Any, get_json_func: Callable[[str], dict[str, Any]], active_claims: set[str]) -> list[_CleanupCandidate]:
|
||||
"""Find stale Jenkins workspace PVCs that are not actively referenced."""
|
||||
|
||||
namespace = settings_obj.jenkins_workspace_namespace
|
||||
payload = get_json_func(f"/api/v1/namespaces/{namespace}/persistentvolumeclaims")
|
||||
items = payload.get("items") if isinstance(payload.get("items"), list) else []
|
||||
candidates: list[_CleanupCandidate] = []
|
||||
|
||||
for pvc in items:
|
||||
if not isinstance(pvc, dict):
|
||||
continue
|
||||
metadata = pvc.get("metadata") if isinstance(pvc.get("metadata"), dict) else {}
|
||||
status = pvc.get("status") if isinstance(pvc.get("status"), dict) else {}
|
||||
claim_name = metadata.get("name")
|
||||
phase = status.get("phase")
|
||||
if not _is_workspace_name(settings_obj, claim_name):
|
||||
continue
|
||||
if _is_deleting(metadata):
|
||||
continue
|
||||
if claim_name in active_claims:
|
||||
continue
|
||||
if phase == "Bound":
|
||||
continue
|
||||
if not _is_old_enough(settings_obj, metadata):
|
||||
continue
|
||||
if not isinstance(claim_name, str) or not claim_name:
|
||||
continue
|
||||
candidates.append(
|
||||
_CleanupCandidate(
|
||||
name=claim_name,
|
||||
kind="pvc",
|
||||
path=f"/api/v1/namespaces/{namespace}/persistentvolumeclaims/{claim_name}",
|
||||
created_at=_created_at(metadata),
|
||||
)
|
||||
)
|
||||
return candidates
|
||||
|
||||
|
||||
def _workspace_binding_from_longhorn(metadata: dict[str, Any], status: dict[str, Any]) -> _LonghornBinding:
|
||||
labels = metadata.get("labels") if isinstance(metadata.get("labels"), dict) else {}
|
||||
kubernetes_status = status.get("kubernetesStatus") if isinstance(status.get("kubernetesStatus"), dict) else {}
|
||||
pvc_name = labels.get("kubernetes.io/created-for/pvc/name")
|
||||
if not isinstance(pvc_name, str) or not pvc_name:
|
||||
pvc_name = kubernetes_status.get("pvcName")
|
||||
pvc_namespace = labels.get("kubernetes.io/created-for/pvc/namespace")
|
||||
if not isinstance(pvc_namespace, str) or not pvc_namespace:
|
||||
pvc_namespace = kubernetes_status.get("namespace")
|
||||
referenced_pv_name = kubernetes_status.get("pvName")
|
||||
return _LonghornBinding(
|
||||
pvc_name=pvc_name,
|
||||
pvc_namespace=pvc_namespace,
|
||||
referenced_pv_name=referenced_pv_name,
|
||||
)
|
||||
|
||||
|
||||
def _should_delete_longhorn_volume(settings_obj: Any, name: str, binding: _LonghornBinding, all_pv_names: set[str], removed_pv_names: set[str]) -> bool:
|
||||
if name in removed_pv_names or binding.referenced_pv_name in removed_pv_names:
|
||||
return True
|
||||
if not _is_workspace_name(settings_obj, binding.pvc_name):
|
||||
return False
|
||||
if (
|
||||
isinstance(binding.referenced_pv_name, str)
|
||||
and binding.referenced_pv_name in all_pv_names
|
||||
) or name in all_pv_names:
|
||||
return False
|
||||
return (
|
||||
binding.pvc_namespace in {None, ""}
|
||||
or binding.pvc_namespace == settings_obj.jenkins_workspace_namespace
|
||||
)
|
||||
|
||||
|
||||
def _workspace_longhorn_candidates(settings_obj: Any, get_json_func: Callable[[str], dict[str, Any]], all_pv_names: set[str], removed_pv_names: set[str]) -> list[_CleanupCandidate]:
|
||||
namespace = "longhorn-system"
|
||||
payload = get_json_func("/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes")
|
||||
items = payload.get("items") if isinstance(payload.get("items"), list) else []
|
||||
candidates: list[_CleanupCandidate] = []
|
||||
|
||||
for volume in items:
|
||||
if not isinstance(volume, dict):
|
||||
continue
|
||||
metadata = volume.get("metadata") if isinstance(volume.get("metadata"), dict) else {}
|
||||
status = volume.get("status") if isinstance(volume.get("status"), dict) else {}
|
||||
spec = volume.get("spec") if isinstance(volume.get("spec"), dict) else {}
|
||||
name = metadata.get("name")
|
||||
if not isinstance(name, str) or not name:
|
||||
continue
|
||||
|
||||
binding = _workspace_binding_from_longhorn(metadata, status)
|
||||
robust_state = status.get("robustness")
|
||||
state = status.get("state")
|
||||
attached = status.get("isAttached")
|
||||
frontend = spec.get("frontend")
|
||||
if not _should_delete_longhorn_volume(
|
||||
settings_obj,
|
||||
name,
|
||||
binding,
|
||||
all_pv_names,
|
||||
removed_pv_names,
|
||||
):
|
||||
continue
|
||||
if _is_deleting(metadata):
|
||||
continue
|
||||
if not _is_old_enough(settings_obj, metadata):
|
||||
continue
|
||||
if state not in {None, "detached", "faulted", "unknown"}:
|
||||
continue
|
||||
if attached is True:
|
||||
continue
|
||||
if robust_state not in {None, "unknown", "faulted", "degraded"}:
|
||||
continue
|
||||
if frontend not in {None, "", "blockdev"}:
|
||||
continue
|
||||
candidates.append(
|
||||
_CleanupCandidate(
|
||||
name=name,
|
||||
kind="longhorn_volume",
|
||||
path=f"/apis/longhorn.io/v1beta2/namespaces/{namespace}/volumes/{name}",
|
||||
created_at=_created_at(metadata),
|
||||
pv_name=name,
|
||||
)
|
||||
)
|
||||
return candidates
|
||||
359
ariadne/services/jenkins_workspace_cleanup.py
Normal file
359
ariadne/services/jenkins_workspace_cleanup.py
Normal file
@ -0,0 +1,359 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from prometheus_client import Counter, Gauge
|
||||
|
||||
from ..k8s.client import delete_json, get_json
|
||||
from ..settings import settings
|
||||
from ..utils.logging import get_logger
|
||||
from .jenkins_workspace_candidates import (
|
||||
_CleanupCandidate,
|
||||
_active_workspace_claims,
|
||||
_workspace_longhorn_candidates,
|
||||
_workspace_pv_candidates,
|
||||
_workspace_pvc_candidates,
|
||||
)
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
JENKINS_WORKSPACE_CLEANUP_RUNS_TOTAL = Counter(
|
||||
"ariadne_jenkins_workspace_cleanup_runs_total",
|
||||
"Jenkins workspace cleanup runs by status and mode",
|
||||
["status", "mode"],
|
||||
)
|
||||
JENKINS_WORKSPACE_CLEANUP_OBJECTS_TOTAL = Counter(
|
||||
"ariadne_jenkins_workspace_cleanup_objects_total",
|
||||
"Jenkins workspace cleanup objects by kind, action, and mode",
|
||||
["kind", "action", "mode"],
|
||||
)
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_RUN_TS = Gauge(
|
||||
"ariadne_jenkins_workspace_cleanup_last_run_timestamp_seconds",
|
||||
"Last Jenkins workspace cleanup run timestamp",
|
||||
)
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_SUCCESS_TS = Gauge(
|
||||
"ariadne_jenkins_workspace_cleanup_last_success_timestamp_seconds",
|
||||
"Last successful Jenkins workspace cleanup timestamp",
|
||||
)
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_FAILURE_TS = Gauge(
|
||||
"ariadne_jenkins_workspace_cleanup_last_failure_timestamp_seconds",
|
||||
"Last failed Jenkins workspace cleanup timestamp",
|
||||
)
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_DELETED = Gauge(
|
||||
"ariadne_jenkins_workspace_cleanup_last_deleted_total",
|
||||
"Last Jenkins workspace cleanup deleted object count",
|
||||
["kind"],
|
||||
)
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_PLANNED = Gauge(
|
||||
"ariadne_jenkins_workspace_cleanup_last_planned_total",
|
||||
"Last Jenkins workspace cleanup planned object count",
|
||||
["kind"],
|
||||
)
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_SKIPPED = Gauge(
|
||||
"ariadne_jenkins_workspace_cleanup_last_skipped_total",
|
||||
"Last Jenkins workspace cleanup skipped object count",
|
||||
)
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_FAILURES = Gauge(
|
||||
"ariadne_jenkins_workspace_cleanup_last_failures_total",
|
||||
"Last Jenkins workspace cleanup failure count",
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class JenkinsWorkspaceCleanupSummary:
|
||||
"""Summarize one Jenkins workspace-storage cleanup pass.
|
||||
|
||||
Inputs: Kubernetes PV/PVC/Longhorn objects fetched from the API server.
|
||||
Outputs: deterministic counters for operator logs and metrics.
|
||||
"""
|
||||
|
||||
pvs_planned: int
|
||||
pvcs_planned: int
|
||||
volumes_planned: int
|
||||
pvs_deleted: int
|
||||
pvcs_deleted: int
|
||||
volumes_deleted: int
|
||||
skipped: int
|
||||
failures: int
|
||||
dry_run: bool
|
||||
|
||||
@property
|
||||
def planned(self) -> int:
|
||||
return self.pvs_planned + self.pvcs_planned + self.volumes_planned
|
||||
|
||||
@property
|
||||
def deleted(self) -> int:
|
||||
return self.pvs_deleted + self.pvcs_deleted + self.volumes_deleted
|
||||
|
||||
|
||||
def _validate_cleanup_settings() -> tuple[str, str, bool, int]:
|
||||
namespace = settings.jenkins_workspace_namespace
|
||||
prefix = settings.jenkins_workspace_pvc_prefix.strip()
|
||||
dry_run = settings.jenkins_workspace_cleanup_dry_run
|
||||
max_deletions = settings.jenkins_workspace_cleanup_max_deletions_per_run
|
||||
if not namespace.strip():
|
||||
raise ValueError("jenkins workspace cleanup namespace is empty")
|
||||
if not prefix:
|
||||
raise ValueError("jenkins workspace cleanup pvc prefix is empty")
|
||||
if settings.jenkins_workspace_cleanup_min_age_hours < 1.0:
|
||||
raise ValueError("jenkins workspace cleanup min age must be >= 1 hour")
|
||||
if max_deletions < 1:
|
||||
raise ValueError("jenkins workspace cleanup max deletions must be >= 1")
|
||||
return namespace, prefix, dry_run, max_deletions
|
||||
|
||||
|
||||
def _planned_removed_pv_names_dry_run(stale_pvcs: list[_CleanupCandidate], stale_pvs: list[_CleanupCandidate], max_deletions: int) -> set[str]:
|
||||
remaining = max(max_deletions - len(stale_pvcs), 0)
|
||||
if remaining == 0:
|
||||
return set()
|
||||
names = [candidate.name for candidate in stale_pvs if candidate.name]
|
||||
return set(names[:remaining])
|
||||
|
||||
|
||||
def _delete_candidates(candidates: list[_CleanupCandidate], *, deletion_budget: int | None, failure_log: str, failure_field: str, removed_pv_names: set[str] | None = None) -> tuple[int, int, int, int | None]:
|
||||
deleted = 0
|
||||
skipped = 0
|
||||
failures = 0
|
||||
budget = deletion_budget
|
||||
for candidate in candidates:
|
||||
if not candidate.name:
|
||||
skipped += 1
|
||||
continue
|
||||
if budget is not None and budget <= 0:
|
||||
skipped += 1
|
||||
continue
|
||||
if budget is not None:
|
||||
budget -= 1
|
||||
try:
|
||||
delete_json(candidate.path)
|
||||
deleted += 1
|
||||
if removed_pv_names is not None:
|
||||
removed_pv_names.add(candidate.name)
|
||||
except Exception as exc:
|
||||
failures += 1
|
||||
logger.info(
|
||||
failure_log,
|
||||
extra={"event": "jenkins_workspace_cleanup", failure_field: candidate.name, "detail": str(exc)},
|
||||
)
|
||||
return deleted, skipped, failures, budget
|
||||
|
||||
|
||||
def _record_guard_cap(*, max_deletions: int, stale_pvcs: list[_CleanupCandidate], stale_pvs: list[_CleanupCandidate], stale_volumes: list[_CleanupCandidate], dry_run: bool) -> None:
|
||||
planned_total = len(stale_pvcs) + len(stale_pvs) + len(stale_volumes)
|
||||
if planned_total <= max_deletions:
|
||||
return
|
||||
logger.warning(
|
||||
"jenkins workspace cleanup capped by max deletions guard",
|
||||
extra={
|
||||
"event": "jenkins_workspace_cleanup",
|
||||
"status": "guard_capped",
|
||||
"namespace": settings.jenkins_workspace_namespace,
|
||||
"dry_run": dry_run,
|
||||
"planned_total": planned_total,
|
||||
"max_deletions": max_deletions,
|
||||
"planned_pvs": len(stale_pvs),
|
||||
"planned_pvcs": len(stale_pvcs),
|
||||
"planned_volumes": len(stale_volumes),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _dry_run_summary(*, namespace: str, max_deletions: int, stale_pvcs: list[_CleanupCandidate], stale_pvs: list[_CleanupCandidate], all_pv_names: set[str]) -> JenkinsWorkspaceCleanupSummary:
|
||||
simulated_removed = _planned_removed_pv_names_dry_run(stale_pvcs, stale_pvs, max_deletions)
|
||||
stale_volumes = _workspace_longhorn_candidates(settings, get_json, all_pv_names, simulated_removed)
|
||||
_record_guard_cap(
|
||||
max_deletions=max_deletions,
|
||||
stale_pvcs=stale_pvcs,
|
||||
stale_pvs=stale_pvs,
|
||||
stale_volumes=stale_volumes,
|
||||
dry_run=True,
|
||||
)
|
||||
logger.info(
|
||||
"jenkins workspace cleanup dry-run enabled",
|
||||
extra={
|
||||
"event": "jenkins_workspace_cleanup",
|
||||
"status": "dry_run",
|
||||
"namespace": namespace,
|
||||
"dry_run": True,
|
||||
"planned_pvs": len(stale_pvs),
|
||||
"planned_pvcs": len(stale_pvcs),
|
||||
"planned_volumes": len(stale_volumes),
|
||||
"max_deletions": max_deletions,
|
||||
},
|
||||
)
|
||||
return JenkinsWorkspaceCleanupSummary(
|
||||
pvs_planned=len(stale_pvs),
|
||||
pvcs_planned=len(stale_pvcs),
|
||||
volumes_planned=len(stale_volumes),
|
||||
pvs_deleted=0,
|
||||
pvcs_deleted=0,
|
||||
volumes_deleted=0,
|
||||
skipped=0,
|
||||
failures=0,
|
||||
dry_run=True,
|
||||
)
|
||||
|
||||
|
||||
def _delete_run_summary(*, namespace: str, max_deletions: int, stale_pvcs: list[_CleanupCandidate], stale_pvs: list[_CleanupCandidate], all_pv_names: set[str]) -> JenkinsWorkspaceCleanupSummary:
|
||||
removed_pv_names: set[str] = set()
|
||||
deletion_budget: int | None = max_deletions
|
||||
pvcs_deleted, pvc_skipped, pvc_failures, deletion_budget = _delete_candidates(
|
||||
stale_pvcs,
|
||||
deletion_budget=deletion_budget,
|
||||
failure_log="jenkins workspace pvc delete failed",
|
||||
failure_field="claim",
|
||||
)
|
||||
pvs_deleted, pv_skipped, pv_failures, deletion_budget = _delete_candidates(
|
||||
stale_pvs,
|
||||
deletion_budget=deletion_budget,
|
||||
failure_log="jenkins workspace pv delete failed",
|
||||
failure_field="pv",
|
||||
removed_pv_names=removed_pv_names,
|
||||
)
|
||||
stale_volumes = _workspace_longhorn_candidates(settings, get_json, all_pv_names, removed_pv_names)
|
||||
_record_guard_cap(
|
||||
max_deletions=max_deletions,
|
||||
stale_pvcs=stale_pvcs,
|
||||
stale_pvs=stale_pvs,
|
||||
stale_volumes=stale_volumes,
|
||||
dry_run=False,
|
||||
)
|
||||
volumes_deleted, volume_skipped, volume_failures, _ = _delete_candidates(
|
||||
stale_volumes,
|
||||
deletion_budget=deletion_budget,
|
||||
failure_log="jenkins workspace longhorn volume delete failed",
|
||||
failure_field="volume",
|
||||
)
|
||||
return JenkinsWorkspaceCleanupSummary(
|
||||
pvs_planned=len(stale_pvs),
|
||||
pvcs_planned=len(stale_pvcs),
|
||||
volumes_planned=len(stale_volumes),
|
||||
pvs_deleted=pvs_deleted,
|
||||
pvcs_deleted=pvcs_deleted,
|
||||
volumes_deleted=volumes_deleted,
|
||||
skipped=pvc_skipped + pv_skipped + volume_skipped,
|
||||
failures=pvc_failures + pv_failures + volume_failures,
|
||||
dry_run=False,
|
||||
)
|
||||
|
||||
|
||||
def _record_metrics(summary: JenkinsWorkspaceCleanupSummary) -> None:
|
||||
mode = "dry_run" if summary.dry_run else "delete"
|
||||
status = "ok" if summary.failures == 0 else "error"
|
||||
JENKINS_WORKSPACE_CLEANUP_RUNS_TOTAL.labels(status=status, mode=mode).inc()
|
||||
if summary.failures:
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_FAILURE_TS.set(datetime.now(timezone.utc).timestamp())
|
||||
else:
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_SUCCESS_TS.set(datetime.now(timezone.utc).timestamp())
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_RUN_TS.set(datetime.now(timezone.utc).timestamp())
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_DELETED.labels(kind="pvc").set(summary.pvcs_deleted)
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_DELETED.labels(kind="pv").set(summary.pvs_deleted)
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_DELETED.labels(kind="longhorn_volume").set(summary.volumes_deleted)
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_PLANNED.labels(kind="pvc").set(summary.pvcs_planned)
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_PLANNED.labels(kind="pv").set(summary.pvs_planned)
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_PLANNED.labels(kind="longhorn_volume").set(summary.volumes_planned)
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_SKIPPED.set(summary.skipped)
|
||||
JENKINS_WORKSPACE_CLEANUP_LAST_FAILURES.set(summary.failures)
|
||||
for kind, planned, deleted in (
|
||||
("pvc", summary.pvcs_planned, summary.pvcs_deleted),
|
||||
("pv", summary.pvs_planned, summary.pvs_deleted),
|
||||
("longhorn_volume", summary.volumes_planned, summary.volumes_deleted),
|
||||
):
|
||||
if planned:
|
||||
JENKINS_WORKSPACE_CLEANUP_OBJECTS_TOTAL.labels(kind=kind, action="planned", mode=mode).inc(planned)
|
||||
if deleted:
|
||||
JENKINS_WORKSPACE_CLEANUP_OBJECTS_TOTAL.labels(kind=kind, action="deleted", mode=mode).inc(deleted)
|
||||
if summary.skipped:
|
||||
JENKINS_WORKSPACE_CLEANUP_OBJECTS_TOTAL.labels(
|
||||
kind="cleanup",
|
||||
action="skipped",
|
||||
mode=mode,
|
||||
).inc(summary.skipped)
|
||||
if summary.failures:
|
||||
JENKINS_WORKSPACE_CLEANUP_OBJECTS_TOTAL.labels(
|
||||
kind="cleanup",
|
||||
action="failed",
|
||||
mode=mode,
|
||||
).inc(summary.failures)
|
||||
|
||||
|
||||
def cleanup_jenkins_workspace_storage() -> JenkinsWorkspaceCleanupSummary:
|
||||
"""Delete stale Jenkins workspace PVC/PV artifacts and orphan Longhorn volumes."""
|
||||
|
||||
summary = JenkinsWorkspaceCleanupSummary(
|
||||
pvs_planned=0,
|
||||
pvcs_planned=0,
|
||||
volumes_planned=0,
|
||||
pvs_deleted=0,
|
||||
pvcs_deleted=0,
|
||||
volumes_deleted=0,
|
||||
skipped=0,
|
||||
failures=0,
|
||||
dry_run=settings.jenkins_workspace_cleanup_dry_run,
|
||||
)
|
||||
try:
|
||||
namespace, _prefix, dry_run, max_deletions = _validate_cleanup_settings()
|
||||
active_claims = _active_workspace_claims(settings, get_json)
|
||||
stale_pvs, all_pv_names = _workspace_pv_candidates(settings, get_json, active_claims)
|
||||
stale_pvcs = _workspace_pvc_candidates(settings, get_json, active_claims)
|
||||
if dry_run:
|
||||
summary = _dry_run_summary(
|
||||
namespace=namespace,
|
||||
max_deletions=max_deletions,
|
||||
stale_pvcs=stale_pvcs,
|
||||
stale_pvs=stale_pvs,
|
||||
all_pv_names=all_pv_names,
|
||||
)
|
||||
else:
|
||||
summary = _delete_run_summary(
|
||||
namespace=namespace,
|
||||
max_deletions=max_deletions,
|
||||
stale_pvcs=stale_pvcs,
|
||||
stale_pvs=stale_pvs,
|
||||
all_pv_names=all_pv_names,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.exception(
|
||||
"jenkins workspace cleanup failed",
|
||||
extra={
|
||||
"event": "jenkins_workspace_cleanup",
|
||||
"status": "error",
|
||||
"namespace": settings.jenkins_workspace_namespace,
|
||||
"detail": str(exc),
|
||||
},
|
||||
)
|
||||
summary = JenkinsWorkspaceCleanupSummary(
|
||||
pvs_planned=summary.pvs_planned,
|
||||
pvcs_planned=summary.pvcs_planned,
|
||||
volumes_planned=summary.volumes_planned,
|
||||
pvs_deleted=summary.pvs_deleted,
|
||||
pvcs_deleted=summary.pvcs_deleted,
|
||||
volumes_deleted=summary.volumes_deleted,
|
||||
skipped=summary.skipped,
|
||||
failures=summary.failures + 1,
|
||||
dry_run=summary.dry_run,
|
||||
)
|
||||
_record_metrics(summary)
|
||||
raise
|
||||
_record_metrics(summary)
|
||||
logger.info(
|
||||
"jenkins workspace cleanup finished",
|
||||
extra={
|
||||
"event": "jenkins_workspace_cleanup",
|
||||
"status": "ok" if summary.failures == 0 else "error",
|
||||
"dry_run": summary.dry_run,
|
||||
"namespace": namespace,
|
||||
"planned_pvs": summary.pvs_planned,
|
||||
"planned_pvcs": summary.pvcs_planned,
|
||||
"planned_volumes": summary.volumes_planned,
|
||||
"deleted_pvs": summary.pvs_deleted,
|
||||
"deleted_pvcs": summary.pvcs_deleted,
|
||||
"deleted_volumes": summary.volumes_deleted,
|
||||
"skipped": summary.skipped,
|
||||
"failures": summary.failures,
|
||||
},
|
||||
)
|
||||
return summary
|
||||
@ -9,6 +9,8 @@ from ..settings import settings
|
||||
|
||||
|
||||
class KeycloakAdminClient:
|
||||
"""Call the Keycloak admin API for user, group, and attribute updates."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._token: str = ""
|
||||
self._expires_at: float = 0.0
|
||||
|
||||
@ -29,6 +29,8 @@ def _profile_complete(user: dict[str, Any]) -> bool:
|
||||
|
||||
|
||||
def run_profile_sync() -> ProfileSyncSummary:
|
||||
"""Clear completed Keycloak profile actions once required fields exist."""
|
||||
|
||||
if not keycloak_admin.ready():
|
||||
summary = ProfileSyncSummary(0, 0, 0, 1, detail="keycloak admin not configured")
|
||||
logger.info(
|
||||
|
||||
@ -19,6 +19,8 @@ class SentEmail:
|
||||
|
||||
|
||||
class Mailer:
|
||||
"""Send onboarding and notification email through configured SMTP."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._host = settings.smtp_host
|
||||
self._port = settings.smtp_port
|
||||
|
||||
@ -115,6 +115,8 @@ def _password_too_long(password: str) -> bool:
|
||||
|
||||
|
||||
class MailuService:
|
||||
"""Synchronize Keycloak user mail settings into Mailu storage."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._db_config = {
|
||||
"host": settings.mailu_db_host,
|
||||
@ -136,11 +138,7 @@ class MailuService:
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def resolve_mailu_email(
|
||||
username: str,
|
||||
attributes: dict[str, Any] | None,
|
||||
fallback_email: str = "",
|
||||
) -> str:
|
||||
def resolve_mailu_email(username: str, attributes: dict[str, Any] | None, fallback_email: str = "") -> str:
|
||||
attrs = attributes or {}
|
||||
explicit = _extract_attr(attrs, MAILU_EMAIL_ATTR)
|
||||
if explicit:
|
||||
@ -180,12 +178,7 @@ class MailuService:
|
||||
},
|
||||
)
|
||||
|
||||
def _prepare_updates(
|
||||
self,
|
||||
username: str,
|
||||
attrs: dict[str, Any],
|
||||
mailu_email: str,
|
||||
) -> tuple[bool, dict[str, list[str]], str]:
|
||||
def _prepare_updates(self, username: str, attrs: dict[str, Any], mailu_email: str) -> tuple[bool, dict[str, list[str]], str]:
|
||||
updates: dict[str, list[str]] = {}
|
||||
if not _extract_attr(attrs, MAILU_EMAIL_ATTR):
|
||||
updates[MAILU_EMAIL_ATTR] = [mailu_email]
|
||||
@ -226,10 +219,7 @@ class MailuService:
|
||||
return True
|
||||
return self._is_service_account(user, username)
|
||||
|
||||
def _build_sync_context(
|
||||
self,
|
||||
user: dict[str, Any],
|
||||
) -> tuple[MailuSyncContext | None, MailuUserSyncResult | None]:
|
||||
def _build_sync_context(self, user: dict[str, Any]) -> tuple[MailuSyncContext | None, MailuUserSyncResult | None]:
|
||||
username = self._username(user)
|
||||
if self._should_skip_user(user, username):
|
||||
return None, MailuUserSyncResult(skipped=1)
|
||||
@ -268,11 +258,7 @@ class MailuService:
|
||||
None,
|
||||
)
|
||||
|
||||
def _ensure_mailbox_with_retry(
|
||||
self,
|
||||
conn: psycopg.Connection,
|
||||
ctx: MailuSyncContext,
|
||||
) -> tuple[bool, bool, bool]:
|
||||
def _ensure_mailbox_with_retry(self, conn: psycopg.Connection, ctx: MailuSyncContext) -> tuple[bool, bool, bool]:
|
||||
mailbox_ok = False
|
||||
rotated = False
|
||||
failed = False
|
||||
@ -303,12 +289,7 @@ class MailuService:
|
||||
return mailbox_ok, failed, rotated
|
||||
|
||||
@staticmethod
|
||||
def _build_sync_result(
|
||||
updated: int,
|
||||
mailbox_ok: bool,
|
||||
failed: bool,
|
||||
rotated: bool,
|
||||
) -> MailuUserSyncResult:
|
||||
def _build_sync_result(updated: int, mailbox_ok: bool, failed: bool, rotated: bool) -> MailuUserSyncResult:
|
||||
if failed:
|
||||
return MailuUserSyncResult(failures=1, updated=updated)
|
||||
if mailbox_ok:
|
||||
@ -324,13 +305,7 @@ class MailuService:
|
||||
mailbox_ok, failed, rotated = self._ensure_mailbox_with_retry(conn, ctx)
|
||||
return self._build_sync_result(ctx.updated, mailbox_ok, failed, rotated)
|
||||
|
||||
def _ensure_mailbox(
|
||||
self,
|
||||
conn: psycopg.Connection,
|
||||
email: str,
|
||||
password: str,
|
||||
display_name: str,
|
||||
) -> bool:
|
||||
def _ensure_mailbox(self, conn: psycopg.Connection, email: str, password: str, display_name: str) -> bool:
|
||||
email = (email or "").strip()
|
||||
if not email or "@" not in email:
|
||||
return False
|
||||
|
||||
@ -54,13 +54,9 @@ def _event_context(payload: dict[str, Any] | None) -> dict[str, Any]:
|
||||
|
||||
|
||||
class MailuEventRunner:
|
||||
def __init__(
|
||||
self,
|
||||
min_interval_sec: float,
|
||||
wait_timeout_sec: float,
|
||||
runner: Callable[[str, bool], tuple[str, str]] | None = None,
|
||||
thread_factory: Callable[..., threading.Thread] = threading.Thread,
|
||||
) -> None:
|
||||
"""Debounce Keycloak events into Mailu synchronization runs."""
|
||||
|
||||
def __init__(self, min_interval_sec: float, wait_timeout_sec: float, runner: Callable[[str, bool], tuple[str, str]] | None = None, thread_factory: Callable[..., threading.Thread] = threading.Thread) -> None:
|
||||
self._min_interval_sec = min_interval_sec
|
||||
self._wait_timeout_sec = wait_timeout_sec
|
||||
self._runner = runner or self._default_runner
|
||||
|
||||
@ -39,6 +39,8 @@ def _normalize_payload(payload: Any) -> dict[str, Any]:
|
||||
|
||||
|
||||
class MetisService:
|
||||
"""Trigger Metis sentinel watch runs and normalize their response."""
|
||||
|
||||
def ready(self) -> bool:
|
||||
return bool(_watch_url())
|
||||
|
||||
|
||||
@ -1,8 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
import re
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
@ -15,97 +13,20 @@ from ..settings import settings
|
||||
from ..utils.logging import get_logger
|
||||
from ..utils.passwords import random_password
|
||||
from .keycloak_admin import keycloak_admin
|
||||
from .nextcloud_maintenance import run_maintenance as run_nextcloud_maintenance
|
||||
from .nextcloud_mail_models import MailSyncCounters
|
||||
from .nextcloud_mail_models import display_name as _display_name
|
||||
from .nextcloud_mail_models import _extract_attr
|
||||
from .nextcloud_mail_models import _parse_mail_export
|
||||
from .nextcloud_mail_models import _resolve_mailu_email
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
def _extract_attr(attrs: Any, key: str) -> str:
|
||||
if not isinstance(attrs, dict):
|
||||
return ""
|
||||
raw = attrs.get(key)
|
||||
if isinstance(raw, list):
|
||||
for item in raw:
|
||||
if isinstance(item, str) and item.strip():
|
||||
return item.strip()
|
||||
return ""
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
return raw.strip()
|
||||
return ""
|
||||
|
||||
|
||||
def _resolve_mailu_email(username: str, user: dict[str, Any]) -> str:
|
||||
attrs = user.get("attributes")
|
||||
mailu_email = _extract_attr(attrs, "mailu_email")
|
||||
if mailu_email:
|
||||
return mailu_email
|
||||
email = user.get("email")
|
||||
if isinstance(email, str) and email.strip():
|
||||
email = email.strip()
|
||||
if email.lower().endswith(f"@{settings.mailu_domain.lower()}"):
|
||||
return email
|
||||
return f"{username}@{settings.mailu_domain}"
|
||||
|
||||
|
||||
def _parse_mail_export(output: str) -> list[tuple[str, str]]:
|
||||
accounts: list[tuple[str, str]] = []
|
||||
account_id = ""
|
||||
for line in output.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
match = re.match(r"^Account\s+(\d+):", line, flags=re.IGNORECASE)
|
||||
if match:
|
||||
account_id = match.group(1)
|
||||
continue
|
||||
match = re.match(r"^-\s*E-?mail:\s*(\S+)", line, flags=re.IGNORECASE)
|
||||
if match and account_id:
|
||||
accounts.append((account_id, match.group(1)))
|
||||
return accounts
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class NextcloudMailSyncSummary:
|
||||
processed: int
|
||||
created: int
|
||||
updated: int
|
||||
deleted: int
|
||||
skipped: int
|
||||
failures: int
|
||||
detail: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class MailSyncCounters:
|
||||
processed: int = 0
|
||||
created: int = 0
|
||||
updated: int = 0
|
||||
deleted: int = 0
|
||||
skipped: int = 0
|
||||
failures: int = 0
|
||||
last_error: str = ""
|
||||
|
||||
def summary(self) -> NextcloudMailSyncSummary:
|
||||
return NextcloudMailSyncSummary(
|
||||
processed=self.processed,
|
||||
created=self.created,
|
||||
updated=self.updated,
|
||||
deleted=self.deleted,
|
||||
skipped=self.skipped,
|
||||
failures=self.failures,
|
||||
detail=self.last_error,
|
||||
)
|
||||
|
||||
def status(self) -> str:
|
||||
return "ok" if self.failures == 0 else "error"
|
||||
|
||||
def record_failure(self, detail: str) -> None:
|
||||
self.failures += 1
|
||||
if detail and not self.last_error:
|
||||
self.last_error = detail
|
||||
|
||||
|
||||
class NextcloudService:
|
||||
"""Synchronize user mail configuration inside the Nextcloud pod."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._executor = PodExecutor(
|
||||
settings.nextcloud_namespace,
|
||||
@ -113,13 +34,7 @@ class NextcloudService:
|
||||
settings.nextcloud_container,
|
||||
)
|
||||
|
||||
def _exec_with_fallback(
|
||||
self,
|
||||
primary: list[str],
|
||||
fallback: list[str],
|
||||
env: dict[str, str] | None = None,
|
||||
check: bool = True,
|
||||
) -> ExecResult:
|
||||
def _exec_with_fallback(self, primary: list[str], fallback: list[str], env: dict[str, str] | None = None, check: bool = True) -> ExecResult:
|
||||
try:
|
||||
result = self._executor.exec(
|
||||
primary,
|
||||
@ -146,12 +61,7 @@ class NextcloudService:
|
||||
)
|
||||
return result
|
||||
|
||||
def _occ_exec(
|
||||
self,
|
||||
args: list[str],
|
||||
env: dict[str, str] | None = None,
|
||||
check: bool = True,
|
||||
) -> ExecResult:
|
||||
def _occ_exec(self, args: list[str], env: dict[str, str] | None = None, check: bool = True) -> ExecResult:
|
||||
command = ["runuser", "-u", "www-data", "--", "php", "/var/www/html/occ", *args]
|
||||
fallback = ["php", "/var/www/html/occ", *args]
|
||||
return self._exec_with_fallback(command, fallback, env=env, check=check)
|
||||
@ -160,21 +70,7 @@ class NextcloudService:
|
||||
result = self._occ_exec(args, check=True)
|
||||
return result.stdout
|
||||
|
||||
def _display_name(self, user: dict[str, Any]) -> str:
|
||||
first = user.get("firstName") if isinstance(user.get("firstName"), str) else ""
|
||||
last = user.get("lastName") if isinstance(user.get("lastName"), str) else ""
|
||||
first = first.strip()
|
||||
last = last.strip()
|
||||
if first and last:
|
||||
return f"{first} {last}"
|
||||
return last or first
|
||||
|
||||
def _ensure_nextcloud_user(
|
||||
self,
|
||||
username: str,
|
||||
mailu_email: str,
|
||||
display_name: str,
|
||||
) -> None:
|
||||
def _ensure_nextcloud_user(self, username: str, mailu_email: str, display_name: str) -> None:
|
||||
result = self._occ_exec(["user:info", username], check=False)
|
||||
if result.ok:
|
||||
return
|
||||
@ -279,11 +175,7 @@ class NextcloudService:
|
||||
full_user = user
|
||||
return username_val, user_id, full_user
|
||||
|
||||
def _list_mail_accounts_safe(
|
||||
self,
|
||||
username: str,
|
||||
counters: MailSyncCounters,
|
||||
) -> list[tuple[str, str]] | None:
|
||||
def _list_mail_accounts_safe(self, username: str, counters: MailSyncCounters) -> list[tuple[str, str]] | None:
|
||||
try:
|
||||
return self._list_mail_accounts(username)
|
||||
except Exception as exc:
|
||||
@ -295,11 +187,7 @@ class NextcloudService:
|
||||
)
|
||||
return None
|
||||
|
||||
def _select_primary_account(
|
||||
self,
|
||||
mailu_accounts: list[tuple[str, str]],
|
||||
mailu_email: str,
|
||||
) -> tuple[str, str]:
|
||||
def _select_primary_account(self, mailu_accounts: list[tuple[str, str]], mailu_email: str) -> tuple[str, str]:
|
||||
primary_id = ""
|
||||
primary_email = ""
|
||||
for account_id, account_email in mailu_accounts:
|
||||
@ -312,13 +200,7 @@ class NextcloudService:
|
||||
break
|
||||
return primary_id, primary_email
|
||||
|
||||
def _update_mail_account(
|
||||
self,
|
||||
username: str,
|
||||
primary_id: str,
|
||||
mailu_email: str,
|
||||
app_pw: str,
|
||||
) -> str | None:
|
||||
def _update_mail_account(self, username: str, primary_id: str, mailu_email: str, app_pw: str) -> str | None:
|
||||
try:
|
||||
self._occ(
|
||||
[
|
||||
@ -383,12 +265,7 @@ class NextcloudService:
|
||||
except Exception as exc:
|
||||
return str(exc)
|
||||
|
||||
def _delete_extra_accounts(
|
||||
self,
|
||||
mailu_accounts: list[tuple[str, str]],
|
||||
primary_id: str,
|
||||
counters: MailSyncCounters,
|
||||
) -> int:
|
||||
def _delete_extra_accounts(self, mailu_accounts: list[tuple[str, str]], primary_id: str, counters: MailSyncCounters) -> int:
|
||||
deleted = 0
|
||||
for account_id, _account_email in mailu_accounts:
|
||||
if account_id == primary_id:
|
||||
@ -407,11 +284,7 @@ class NextcloudService:
|
||||
if email.lower().endswith(f"@{settings.mailu_domain.lower()}")
|
||||
]
|
||||
|
||||
def _summarize_mail_accounts(
|
||||
self,
|
||||
accounts: list[tuple[str, str]],
|
||||
mailu_email: str,
|
||||
) -> tuple[int, str, list[str]]:
|
||||
def _summarize_mail_accounts(self, accounts: list[tuple[str, str]], mailu_email: str) -> tuple[int, str, list[str]]:
|
||||
mailu_accounts = self._mailu_accounts(accounts)
|
||||
account_count = len(mailu_accounts)
|
||||
primary_email = ""
|
||||
@ -425,11 +298,7 @@ class NextcloudService:
|
||||
primary_email = account_email
|
||||
return account_count, primary_email, editor_mode_ids
|
||||
|
||||
def _mail_sync_context(
|
||||
self,
|
||||
user: dict[str, Any],
|
||||
counters: MailSyncCounters,
|
||||
) -> tuple[str, str, str, str, dict[str, Any]] | None:
|
||||
def _mail_sync_context(self, user: dict[str, Any], counters: MailSyncCounters) -> tuple[str, str, str, str, dict[str, Any]] | None:
|
||||
normalized = self._normalize_user(user)
|
||||
if not normalized:
|
||||
counters.skipped += 1
|
||||
@ -448,14 +317,7 @@ class NextcloudService:
|
||||
pass
|
||||
return username, user_id, mailu_email, app_pw, full_user
|
||||
|
||||
def _sync_mail_accounts(
|
||||
self,
|
||||
username: str,
|
||||
mailu_email: str,
|
||||
app_pw: str,
|
||||
accounts: list[tuple[str, str]],
|
||||
counters: MailSyncCounters,
|
||||
) -> bool:
|
||||
def _sync_mail_accounts(self, username: str, mailu_email: str, app_pw: str, accounts: list[tuple[str, str]], counters: MailSyncCounters) -> bool:
|
||||
mailu_accounts = self._mailu_accounts(accounts)
|
||||
if mailu_accounts:
|
||||
primary_id, _primary_email = self._select_primary_account(mailu_accounts, mailu_email)
|
||||
@ -473,12 +335,7 @@ class NextcloudService:
|
||||
counters.created += 1
|
||||
return True
|
||||
|
||||
def _apply_mail_metadata(
|
||||
self,
|
||||
user_id: str,
|
||||
mailu_email: str,
|
||||
accounts: list[tuple[str, str]],
|
||||
) -> None:
|
||||
def _apply_mail_metadata(self, user_id: str, mailu_email: str, accounts: list[tuple[str, str]]) -> None:
|
||||
account_count, primary_email, editor_mode_ids = self._summarize_mail_accounts(accounts, mailu_email)
|
||||
self._set_editor_mode_richtext(editor_mode_ids)
|
||||
if user_id:
|
||||
@ -491,7 +348,7 @@ class NextcloudService:
|
||||
username, user_id, mailu_email, app_pw, full_user = context
|
||||
|
||||
try:
|
||||
display_name = self._display_name(full_user)
|
||||
display_name = _display_name(full_user)
|
||||
self._ensure_nextcloud_user(username, mailu_email, display_name)
|
||||
except Exception as exc:
|
||||
counters.record_failure(f"nextcloud user ensure failed: {exc}")
|
||||
@ -558,13 +415,6 @@ class NextcloudService:
|
||||
|
||||
return {"status": counters.status(), "summary": summary_payload, "detail": summary.detail}
|
||||
|
||||
def _run_shell(self, script: str, check: bool = True) -> None:
|
||||
self._executor.exec(
|
||||
script,
|
||||
timeout_sec=settings.nextcloud_exec_timeout_sec,
|
||||
check=check,
|
||||
)
|
||||
|
||||
def _external_api(self, method: str, path: str, data: dict[str, Any] | None = None) -> dict[str, Any]:
|
||||
if not settings.nextcloud_url:
|
||||
raise RuntimeError("nextcloud url not configured")
|
||||
@ -587,113 +437,7 @@ class NextcloudService:
|
||||
return {}
|
||||
|
||||
def run_maintenance(self) -> dict[str, Any]:
|
||||
if not settings.nextcloud_namespace:
|
||||
raise RuntimeError("nextcloud maintenance not configured")
|
||||
|
||||
try:
|
||||
self._run_shell(
|
||||
"""
|
||||
set -euo pipefail
|
||||
if [ ! -d /var/www/html/lib ] && [ -d /usr/src/nextcloud/lib ]; then
|
||||
if command -v rsync >/dev/null 2>&1; then
|
||||
rsync -a --delete --exclude config --exclude data /usr/src/nextcloud/ /var/www/html/
|
||||
else
|
||||
cp -a /usr/src/nextcloud/. /var/www/html/
|
||||
fi
|
||||
fi
|
||||
mkdir -p /var/www/html/data
|
||||
chown 33:33 /var/www/html || true
|
||||
chmod 775 /var/www/html || true
|
||||
chown -R 33:33 /var/www/html/apps /var/www/html/custom_apps /var/www/html/data /var/www/html/config 2>/dev/null || true
|
||||
""",
|
||||
check=False,
|
||||
)
|
||||
|
||||
self._occ(["config:app:set", "theming", "name", "--value", "Atlas Cloud"])
|
||||
self._occ(["config:app:set", "theming", "slogan", "--value", "Unified access to Atlas services"])
|
||||
theming_url = settings.nextcloud_url or "https://cloud.bstein.dev"
|
||||
self._occ(["config:app:set", "theming", "url", "--value", theming_url])
|
||||
self._occ(["config:app:set", "theming", "color", "--value", "#0f172a"])
|
||||
self._occ(["config:app:set", "theming", "disable-user-theming", "--value", "yes"])
|
||||
|
||||
self._executor.exec(
|
||||
["runuser", "-u", "www-data", "--", "php", "/var/www/html/occ", "app:install", "customcss"],
|
||||
timeout_sec=settings.nextcloud_exec_timeout_sec,
|
||||
check=False,
|
||||
)
|
||||
self._executor.exec(
|
||||
["runuser", "-u", "www-data", "--", "php", "/var/www/html/occ", "app:enable", "customcss"],
|
||||
timeout_sec=settings.nextcloud_exec_timeout_sec,
|
||||
check=False,
|
||||
)
|
||||
|
||||
mail_css = (
|
||||
".mail-message-body, .mail-message-body pre, .mail-message-body code, .mail-message-body table {\n"
|
||||
" font-family: \"Inter\", \"Source Sans 3\", \"Helvetica Neue\", Arial, sans-serif;\n"
|
||||
" font-size: 14px;\n"
|
||||
" line-height: 1.6;\n"
|
||||
" color: var(--color-main-text);\n"
|
||||
"}\n"
|
||||
".mail-message-body pre {\n"
|
||||
" background: rgba(15, 23, 42, 0.06);\n"
|
||||
" padding: 12px;\n"
|
||||
" border-radius: 8px;\n"
|
||||
"}\n"
|
||||
".mail-message-body blockquote {\n"
|
||||
" border-left: 3px solid var(--color-border);\n"
|
||||
" padding-left: 12px;\n"
|
||||
" margin: 8px 0;\n"
|
||||
" color: var(--color-text-lighter);\n"
|
||||
"}\n"
|
||||
".mail-message-body img {\n"
|
||||
" max-width: 100%;\n"
|
||||
" border-radius: 6px;\n"
|
||||
"}\n"
|
||||
)
|
||||
self._occ(["config:app:set", "customcss", "css", "--value", mail_css])
|
||||
self._occ(["config:app:set", "files", "default_quota", "--value", "250 GB"])
|
||||
|
||||
payload = self._external_api("GET", "?format=json")
|
||||
links = payload.get("ocs", {}).get("data", []) if isinstance(payload, dict) else []
|
||||
for link in links:
|
||||
link_id = link.get("id") if isinstance(link, dict) else None
|
||||
if link_id is not None:
|
||||
self._external_api("DELETE", f"/sites/{link_id}?format=json")
|
||||
|
||||
sites = [
|
||||
("Vaultwarden", "https://vault.bstein.dev"),
|
||||
("Jellyfin", "https://stream.bstein.dev"),
|
||||
("Gitea", "https://scm.bstein.dev"),
|
||||
("Jenkins", "https://ci.bstein.dev"),
|
||||
("Harbor", "https://registry.bstein.dev"),
|
||||
("Vault", "https://secret.bstein.dev"),
|
||||
("Jitsi", "https://meet.bstein.dev"),
|
||||
("Grafana", "https://metrics.bstein.dev"),
|
||||
("Chat LLM", "https://chat.ai.bstein.dev"),
|
||||
("Vision", "https://draw.ai.bstein.dev"),
|
||||
("STT/TTS", "https://talk.ai.bstein.dev"),
|
||||
]
|
||||
for name, url in sites:
|
||||
self._external_api(
|
||||
"POST",
|
||||
"/sites?format=json",
|
||||
data={
|
||||
"name": name,
|
||||
"url": url,
|
||||
"lang": "",
|
||||
"type": "link",
|
||||
"device": "",
|
||||
"icon": "",
|
||||
"groups[]": "",
|
||||
"redirect": "1",
|
||||
},
|
||||
)
|
||||
except (ExecError, PodSelectionError, TimeoutError) as exc:
|
||||
return {"status": "error", "detail": str(exc)}
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return {"status": "error", "detail": str(exc)}
|
||||
|
||||
return {"status": "ok", "detail": "maintenance complete"}
|
||||
return run_nextcloud_maintenance(self)
|
||||
|
||||
|
||||
nextcloud = NextcloudService()
|
||||
|
||||
106
ariadne/services/nextcloud_mail_models.py
Normal file
106
ariadne/services/nextcloud_mail_models.py
Normal file
@ -0,0 +1,106 @@
|
||||
"""Mail synchronization helpers for Nextcloud account management."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from ..settings import settings
|
||||
|
||||
|
||||
def _extract_attr(attrs: Any, key: str) -> str:
|
||||
if not isinstance(attrs, dict):
|
||||
return ""
|
||||
raw = attrs.get(key)
|
||||
if isinstance(raw, list):
|
||||
for item in raw:
|
||||
if isinstance(item, str) and item.strip():
|
||||
return item.strip()
|
||||
return ""
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
return raw.strip()
|
||||
return ""
|
||||
|
||||
|
||||
def _resolve_mailu_email(username: str, user: dict[str, Any]) -> str:
|
||||
attrs = user.get("attributes")
|
||||
mailu_email = _extract_attr(attrs, "mailu_email")
|
||||
if mailu_email:
|
||||
return mailu_email
|
||||
email = user.get("email")
|
||||
if isinstance(email, str) and email.strip():
|
||||
email = email.strip()
|
||||
if email.lower().endswith(f"@{settings.mailu_domain.lower()}"):
|
||||
return email
|
||||
return f"{username}@{settings.mailu_domain}"
|
||||
|
||||
|
||||
def _parse_mail_export(output: str) -> list[tuple[str, str]]:
|
||||
accounts: list[tuple[str, str]] = []
|
||||
account_id = ""
|
||||
for line in output.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
match = re.match(r"^Account\s+(\d+):", line, flags=re.IGNORECASE)
|
||||
if match:
|
||||
account_id = match.group(1)
|
||||
continue
|
||||
match = re.match(r"^-\s*E-?mail:\s*(\S+)", line, flags=re.IGNORECASE)
|
||||
if match and account_id:
|
||||
accounts.append((account_id, match.group(1)))
|
||||
return accounts
|
||||
|
||||
|
||||
def display_name(user: dict[str, Any]) -> str:
|
||||
"""Return a human display name from Keycloak first/last name fields."""
|
||||
|
||||
first = user.get("firstName") if isinstance(user.get("firstName"), str) else ""
|
||||
last = user.get("lastName") if isinstance(user.get("lastName"), str) else ""
|
||||
first = first.strip()
|
||||
last = last.strip()
|
||||
if first and last:
|
||||
return f"{first} {last}"
|
||||
return last or first
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class NextcloudMailSyncSummary:
|
||||
processed: int
|
||||
created: int
|
||||
updated: int
|
||||
deleted: int
|
||||
skipped: int
|
||||
failures: int
|
||||
detail: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class MailSyncCounters:
|
||||
processed: int = 0
|
||||
created: int = 0
|
||||
updated: int = 0
|
||||
deleted: int = 0
|
||||
skipped: int = 0
|
||||
failures: int = 0
|
||||
last_error: str = ""
|
||||
|
||||
def summary(self) -> NextcloudMailSyncSummary:
|
||||
return NextcloudMailSyncSummary(
|
||||
processed=self.processed,
|
||||
created=self.created,
|
||||
updated=self.updated,
|
||||
deleted=self.deleted,
|
||||
skipped=self.skipped,
|
||||
failures=self.failures,
|
||||
detail=self.last_error,
|
||||
)
|
||||
|
||||
def status(self) -> str:
|
||||
return "ok" if self.failures == 0 else "error"
|
||||
|
||||
def record_failure(self, detail: str) -> None:
|
||||
self.failures += 1
|
||||
if detail and not self.last_error:
|
||||
self.last_error = detail
|
||||
130
ariadne/services/nextcloud_maintenance.py
Normal file
130
ariadne/services/nextcloud_maintenance.py
Normal file
@ -0,0 +1,130 @@
|
||||
"""Nextcloud maintenance task implementation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from ..k8s.exec import ExecError
|
||||
from ..k8s.pods import PodSelectionError
|
||||
from ..settings import settings
|
||||
|
||||
|
||||
def _run_shell(service: Any, script: str, check: bool = True) -> None:
|
||||
service._executor.exec(
|
||||
script,
|
||||
timeout_sec=settings.nextcloud_exec_timeout_sec,
|
||||
check=check,
|
||||
)
|
||||
|
||||
|
||||
def run_maintenance(service: Any) -> dict[str, Any]:
|
||||
"""Run theming, app-link, quota, and filesystem maintenance for Nextcloud."""
|
||||
|
||||
if not settings.nextcloud_namespace:
|
||||
raise RuntimeError("nextcloud maintenance not configured")
|
||||
|
||||
try:
|
||||
_run_shell(
|
||||
service,
|
||||
"""
|
||||
set -euo pipefail
|
||||
if [ ! -d /var/www/html/lib ] && [ -d /usr/src/nextcloud/lib ]; then
|
||||
if command -v rsync >/dev/null 2>&1; then
|
||||
rsync -a --delete --exclude config --exclude data /usr/src/nextcloud/ /var/www/html/
|
||||
else
|
||||
cp -a /usr/src/nextcloud/. /var/www/html/
|
||||
fi
|
||||
fi
|
||||
mkdir -p /var/www/html/data
|
||||
chown 33:33 /var/www/html || true
|
||||
chmod 775 /var/www/html || true
|
||||
chown -R 33:33 /var/www/html/apps /var/www/html/custom_apps /var/www/html/data /var/www/html/config 2>/dev/null || true
|
||||
""",
|
||||
check=False,
|
||||
)
|
||||
|
||||
service._occ(["config:app:set", "theming", "name", "--value", "Atlas Cloud"])
|
||||
service._occ(["config:app:set", "theming", "slogan", "--value", "Unified access to Atlas services"])
|
||||
theming_url = settings.nextcloud_url or "https://cloud.bstein.dev"
|
||||
service._occ(["config:app:set", "theming", "url", "--value", theming_url])
|
||||
service._occ(["config:app:set", "theming", "color", "--value", "#0f172a"])
|
||||
service._occ(["config:app:set", "theming", "disable-user-theming", "--value", "yes"])
|
||||
|
||||
service._executor.exec(
|
||||
["runuser", "-u", "www-data", "--", "php", "/var/www/html/occ", "app:install", "customcss"],
|
||||
timeout_sec=settings.nextcloud_exec_timeout_sec,
|
||||
check=False,
|
||||
)
|
||||
service._executor.exec(
|
||||
["runuser", "-u", "www-data", "--", "php", "/var/www/html/occ", "app:enable", "customcss"],
|
||||
timeout_sec=settings.nextcloud_exec_timeout_sec,
|
||||
check=False,
|
||||
)
|
||||
|
||||
mail_css = (
|
||||
".mail-message-body, .mail-message-body pre, .mail-message-body code, .mail-message-body table {\n"
|
||||
" font-family: \"Inter\", \"Source Sans 3\", \"Helvetica Neue\", Arial, sans-serif;\n"
|
||||
" font-size: 14px;\n"
|
||||
" line-height: 1.6;\n"
|
||||
" color: var(--color-main-text);\n"
|
||||
"}\n"
|
||||
".mail-message-body pre {\n"
|
||||
" background: rgba(15, 23, 42, 0.06);\n"
|
||||
" padding: 12px;\n"
|
||||
" border-radius: 8px;\n"
|
||||
"}\n"
|
||||
".mail-message-body blockquote {\n"
|
||||
" border-left: 3px solid var(--color-border);\n"
|
||||
" padding-left: 12px;\n"
|
||||
" margin: 8px 0;\n"
|
||||
" color: var(--color-text-lighter);\n"
|
||||
"}\n"
|
||||
".mail-message-body img {\n"
|
||||
" max-width: 100%;\n"
|
||||
" border-radius: 6px;\n"
|
||||
"}\n"
|
||||
)
|
||||
service._occ(["config:app:set", "customcss", "css", "--value", mail_css])
|
||||
service._occ(["config:app:set", "files", "default_quota", "--value", "250 GB"])
|
||||
|
||||
payload = service._external_api("GET", "?format=json")
|
||||
links = payload.get("ocs", {}).get("data", []) if isinstance(payload, dict) else []
|
||||
for link in links:
|
||||
link_id = link.get("id") if isinstance(link, dict) else None
|
||||
if link_id is not None:
|
||||
service._external_api("DELETE", f"/sites/{link_id}?format=json")
|
||||
|
||||
sites = [
|
||||
("Vaultwarden", "https://vault.bstein.dev"),
|
||||
("Jellyfin", "https://stream.bstein.dev"),
|
||||
("Gitea", "https://scm.bstein.dev"),
|
||||
("Jenkins", "https://ci.bstein.dev"),
|
||||
("Harbor", "https://registry.bstein.dev"),
|
||||
("Vault", "https://secret.bstein.dev"),
|
||||
("Jitsi", "https://meet.bstein.dev"),
|
||||
("Grafana", "https://metrics.bstein.dev"),
|
||||
("Chat LLM", "https://chat.ai.bstein.dev"),
|
||||
("Vision", "https://draw.ai.bstein.dev"),
|
||||
("STT/TTS", "https://talk.ai.bstein.dev"),
|
||||
]
|
||||
for name, url in sites:
|
||||
service._external_api(
|
||||
"POST",
|
||||
"/sites?format=json",
|
||||
data={
|
||||
"name": name,
|
||||
"url": url,
|
||||
"lang": "",
|
||||
"type": "link",
|
||||
"device": "",
|
||||
"icon": "",
|
||||
"groups[]": "",
|
||||
"redirect": "1",
|
||||
},
|
||||
)
|
||||
except (ExecError, PodSelectionError, TimeoutError) as exc:
|
||||
return {"status": "error", "detail": str(exc)}
|
||||
except Exception as exc: # noqa: BLE001
|
||||
return {"status": "error", "detail": str(exc)}
|
||||
|
||||
return {"status": "ok", "detail": "maintenance complete"}
|
||||
@ -24,6 +24,8 @@ HTTP_NOT_FOUND = 404
|
||||
|
||||
|
||||
def parse_size(value: str) -> int:
|
||||
"""Convert OpenSearch CAT index size text into bytes."""
|
||||
|
||||
if not value:
|
||||
return 0
|
||||
text = value.strip().lower()
|
||||
@ -65,6 +67,8 @@ def _delete_index(client: httpx.Client, index: str) -> None:
|
||||
|
||||
|
||||
def prune_indices() -> OpensearchPruneSummary:
|
||||
"""Delete old OpenSearch indices until usage is under the configured limit."""
|
||||
|
||||
patterns = [p.strip() for p in settings.opensearch_index_patterns.split(",") if p.strip()]
|
||||
if not patterns:
|
||||
return OpensearchPruneSummary(0, 0, 0, detail="no patterns configured")
|
||||
|
||||
@ -28,6 +28,8 @@ def _delete_pod(namespace: str, name: str) -> None:
|
||||
|
||||
|
||||
def clean_finished_pods() -> PodCleanerSummary:
|
||||
"""Delete succeeded and failed pods across namespaces."""
|
||||
|
||||
deleted = 0
|
||||
skipped = 0
|
||||
failures = 0
|
||||
|
||||
@ -8,6 +8,9 @@ import httpx
|
||||
|
||||
from ..settings import settings
|
||||
from ..utils.logging import get_logger
|
||||
from .vault_policies import DEV_KV_POLICY as _DEV_KV_POLICY
|
||||
from .vault_policies import K8S_ROLES as _K8S_ROLES
|
||||
from .vault_policies import VAULT_ADMIN_POLICY as _VAULT_ADMIN_POLICY
|
||||
|
||||
|
||||
logger = get_logger(__name__)
|
||||
@ -45,264 +48,9 @@ def _build_policy(read_paths: str, write_paths: str) -> str:
|
||||
)
|
||||
return "\n".join(policy_parts).strip() + "\n"
|
||||
|
||||
|
||||
_K8S_ROLES: list[dict[str, str]] = [
|
||||
{
|
||||
"role": "outline",
|
||||
"namespace": "outline",
|
||||
"service_accounts": "outline-vault",
|
||||
"read_paths": "outline/* shared/postmark-relay",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "planka",
|
||||
"namespace": "planka",
|
||||
"service_accounts": "planka-vault",
|
||||
"read_paths": "planka/* shared/postmark-relay",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "bstein-dev-home",
|
||||
"namespace": "bstein-dev-home",
|
||||
"service_accounts": "bstein-dev-home,bstein-dev-home-vault-sync",
|
||||
"read_paths": "portal/* shared/chat-ai-keys-runtime shared/portal-e2e-client shared/postmark-relay "
|
||||
"mailu/mailu-initial-account-secret shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "gitea",
|
||||
"namespace": "gitea",
|
||||
"service_accounts": "gitea-vault",
|
||||
"read_paths": "gitea/*",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "vaultwarden",
|
||||
"namespace": "vaultwarden",
|
||||
"service_accounts": "vaultwarden-vault",
|
||||
"read_paths": "vaultwarden/* mailu/mailu-initial-account-secret",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "sso",
|
||||
"namespace": "sso",
|
||||
"service_accounts": "sso-vault,sso-vault-sync,mas-secrets-ensure",
|
||||
"read_paths": "sso/* portal/bstein-dev-home-keycloak-admin shared/keycloak-admin "
|
||||
"shared/portal-e2e-client shared/postmark-relay shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "mailu-mailserver",
|
||||
"namespace": "mailu-mailserver",
|
||||
"service_accounts": "mailu-vault-sync",
|
||||
"read_paths": "mailu/* shared/postmark-relay shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "harbor",
|
||||
"namespace": "harbor",
|
||||
"service_accounts": "harbor-vault-sync",
|
||||
"read_paths": "harbor/* shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "nextcloud",
|
||||
"namespace": "nextcloud",
|
||||
"service_accounts": "nextcloud-vault",
|
||||
"read_paths": "nextcloud/* shared/keycloak-admin shared/postmark-relay",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "comms",
|
||||
"namespace": "comms",
|
||||
"service_accounts": "comms-vault,atlasbot",
|
||||
"read_paths": "comms/* shared/chat-ai-keys-runtime shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "jenkins",
|
||||
"namespace": "jenkins",
|
||||
"service_accounts": "jenkins",
|
||||
"read_paths": "jenkins/*",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "monitoring",
|
||||
"namespace": "monitoring",
|
||||
"service_accounts": "monitoring-vault-sync",
|
||||
"read_paths": "monitoring/* shared/postmark-relay shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "logging",
|
||||
"namespace": "logging",
|
||||
"service_accounts": "logging-vault-sync",
|
||||
"read_paths": "logging/* shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "pegasus",
|
||||
"namespace": "jellyfin",
|
||||
"service_accounts": "pegasus-vault-sync",
|
||||
"read_paths": "pegasus/* shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "crypto",
|
||||
"namespace": "crypto",
|
||||
"service_accounts": "crypto-vault-sync",
|
||||
"read_paths": "crypto/* shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "health",
|
||||
"namespace": "health",
|
||||
"service_accounts": "health-vault-sync",
|
||||
"read_paths": "health/*",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "maintenance",
|
||||
"namespace": "maintenance",
|
||||
"service_accounts": "ariadne,maintenance-vault-sync",
|
||||
"read_paths": "maintenance/ariadne-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret "
|
||||
"mailu/mailu-initial-account-secret comms/synapse-admin shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "finance",
|
||||
"namespace": "finance",
|
||||
"service_accounts": "finance-vault",
|
||||
"read_paths": "finance/* shared/postmark-relay",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "finance-secrets",
|
||||
"namespace": "finance",
|
||||
"service_accounts": "finance-secrets-ensure",
|
||||
"read_paths": "",
|
||||
"write_paths": "finance/*",
|
||||
},
|
||||
{
|
||||
"role": "longhorn",
|
||||
"namespace": "longhorn-system",
|
||||
"service_accounts": "longhorn-vault,longhorn-vault-sync",
|
||||
"read_paths": "longhorn/* shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "postgres",
|
||||
"namespace": "postgres",
|
||||
"service_accounts": "postgres-vault",
|
||||
"read_paths": "postgres/postgres-db",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "vault",
|
||||
"namespace": "vault",
|
||||
"service_accounts": "vault",
|
||||
"read_paths": "vault/*",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "sso-secrets",
|
||||
"namespace": "sso",
|
||||
"service_accounts": "mas-secrets-ensure",
|
||||
"read_paths": "shared/keycloak-admin",
|
||||
"write_paths": "harbor/harbor-oidc vault/vault-oidc-config comms/synapse-oidc "
|
||||
"logging/oauth2-proxy-logs-oidc finance/actual-oidc",
|
||||
},
|
||||
{
|
||||
"role": "crypto-secrets",
|
||||
"namespace": "crypto",
|
||||
"service_accounts": "crypto-secrets-ensure",
|
||||
"read_paths": "",
|
||||
"write_paths": "crypto/wallet-monero-temp-rpc-auth",
|
||||
},
|
||||
{
|
||||
"role": "comms-secrets",
|
||||
"namespace": "comms",
|
||||
"service_accounts": "comms-secrets-ensure,mas-db-ensure,mas-admin-client-secret-writer,othrys-synapse-signingkey-job",
|
||||
"read_paths": "",
|
||||
"write_paths": "comms/turn-shared-secret comms/livekit-api comms/synapse-redis comms/synapse-macaroon "
|
||||
"comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin comms/synapse-registration "
|
||||
"comms/mas-db comms/mas-admin-client-runtime comms/mas-secrets-runtime comms/othrys-synapse-signingkey",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
_VAULT_ADMIN_POLICY = """
|
||||
path "sys/auth" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
path "sys/auth/*" {
|
||||
capabilities = ["create", "update", "delete", "sudo", "read"]
|
||||
}
|
||||
path "auth/kubernetes/*" {
|
||||
capabilities = ["create", "update", "read"]
|
||||
}
|
||||
path "auth/oidc/*" {
|
||||
capabilities = ["create", "update", "read"]
|
||||
}
|
||||
path "sys/policies/acl" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "sys/policies/acl/*" {
|
||||
capabilities = ["create", "update", "read"]
|
||||
}
|
||||
path "sys/internal/ui/mounts" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
path "sys/mounts" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
path "sys/mounts/auth/*" {
|
||||
capabilities = ["read", "update", "sudo"]
|
||||
}
|
||||
path "kv/data/atlas/vault/*" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
path "kv/metadata/atlas/vault/*" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/data/*" {
|
||||
capabilities = ["create", "update", "read", "delete", "patch"]
|
||||
}
|
||||
path "kv/metadata" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/metadata/*" {
|
||||
capabilities = ["read", "list", "delete"]
|
||||
}
|
||||
path "kv/data/atlas/shared/*" {
|
||||
capabilities = ["create", "update", "read", "patch"]
|
||||
}
|
||||
path "kv/metadata/atlas/shared/*" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
""".strip()
|
||||
|
||||
|
||||
_DEV_KV_POLICY = """
|
||||
path "kv/metadata" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/metadata/atlas" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/metadata/atlas/shared" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/metadata/atlas/shared/*" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/data/atlas/shared/*" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
""".strip()
|
||||
|
||||
|
||||
class VaultClient:
|
||||
"""Minimal HTTP client for Vault API requests."""
|
||||
|
||||
def __init__(self, base_url: str, token: str | None = None) -> None:
|
||||
self._base_url = base_url.rstrip("/")
|
||||
self._token = token
|
||||
@ -321,6 +69,8 @@ class VaultClient:
|
||||
|
||||
|
||||
class VaultService:
|
||||
"""Ensure Vault is initialized, unsealed, and configured for Atlas access."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._token: str | None = None
|
||||
|
||||
|
||||
258
ariadne/services/vault_policies.py
Normal file
258
ariadne/services/vault_policies.py
Normal file
@ -0,0 +1,258 @@
|
||||
"""Vault role and policy definitions used by Ariadne Vault reconciliation."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
K8S_ROLES: list[dict[str, str]] = [
|
||||
{
|
||||
"role": "outline",
|
||||
"namespace": "outline",
|
||||
"service_accounts": "outline-vault",
|
||||
"read_paths": "outline/* shared/postmark-relay",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "planka",
|
||||
"namespace": "planka",
|
||||
"service_accounts": "planka-vault",
|
||||
"read_paths": "planka/* shared/postmark-relay",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "bstein-dev-home",
|
||||
"namespace": "bstein-dev-home",
|
||||
"service_accounts": "bstein-dev-home,bstein-dev-home-vault-sync",
|
||||
"read_paths": "portal/* shared/chat-ai-keys-runtime shared/portal-e2e-client shared/postmark-relay "
|
||||
"mailu/mailu-initial-account-secret shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "gitea",
|
||||
"namespace": "gitea",
|
||||
"service_accounts": "gitea-vault",
|
||||
"read_paths": "gitea/*",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "vaultwarden",
|
||||
"namespace": "vaultwarden",
|
||||
"service_accounts": "vaultwarden-vault",
|
||||
"read_paths": "vaultwarden/* mailu/mailu-initial-account-secret",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "sso",
|
||||
"namespace": "sso",
|
||||
"service_accounts": "sso-vault,sso-vault-sync,mas-secrets-ensure",
|
||||
"read_paths": "sso/* portal/bstein-dev-home-keycloak-admin shared/keycloak-admin "
|
||||
"shared/portal-e2e-client shared/postmark-relay shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "mailu-mailserver",
|
||||
"namespace": "mailu-mailserver",
|
||||
"service_accounts": "mailu-vault-sync",
|
||||
"read_paths": "mailu/* shared/postmark-relay shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "harbor",
|
||||
"namespace": "harbor",
|
||||
"service_accounts": "harbor-vault-sync",
|
||||
"read_paths": "harbor/* shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "nextcloud",
|
||||
"namespace": "nextcloud",
|
||||
"service_accounts": "nextcloud-vault",
|
||||
"read_paths": "nextcloud/* shared/keycloak-admin shared/postmark-relay",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "comms",
|
||||
"namespace": "comms",
|
||||
"service_accounts": "comms-vault,atlasbot",
|
||||
"read_paths": "comms/* shared/chat-ai-keys-runtime shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "jenkins",
|
||||
"namespace": "jenkins",
|
||||
"service_accounts": "jenkins",
|
||||
"read_paths": "jenkins/*",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "monitoring",
|
||||
"namespace": "monitoring",
|
||||
"service_accounts": "monitoring-vault-sync",
|
||||
"read_paths": "monitoring/* shared/postmark-relay shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "logging",
|
||||
"namespace": "logging",
|
||||
"service_accounts": "logging-vault-sync",
|
||||
"read_paths": "logging/* shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "pegasus",
|
||||
"namespace": "jellyfin",
|
||||
"service_accounts": "pegasus-vault-sync",
|
||||
"read_paths": "pegasus/* shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "crypto",
|
||||
"namespace": "crypto",
|
||||
"service_accounts": "crypto-vault-sync",
|
||||
"read_paths": "crypto/* shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "health",
|
||||
"namespace": "health",
|
||||
"service_accounts": "health-vault-sync",
|
||||
"read_paths": "health/*",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "maintenance",
|
||||
"namespace": "maintenance",
|
||||
"service_accounts": "ariadne,maintenance-vault-sync",
|
||||
"read_paths": "maintenance/ariadne-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret "
|
||||
"mailu/mailu-initial-account-secret comms/synapse-admin shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "finance",
|
||||
"namespace": "finance",
|
||||
"service_accounts": "finance-vault",
|
||||
"read_paths": "finance/* shared/postmark-relay",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "finance-secrets",
|
||||
"namespace": "finance",
|
||||
"service_accounts": "finance-secrets-ensure",
|
||||
"read_paths": "",
|
||||
"write_paths": "finance/*",
|
||||
},
|
||||
{
|
||||
"role": "longhorn",
|
||||
"namespace": "longhorn-system",
|
||||
"service_accounts": "longhorn-vault,longhorn-vault-sync",
|
||||
"read_paths": "longhorn/* shared/harbor-pull",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "postgres",
|
||||
"namespace": "postgres",
|
||||
"service_accounts": "postgres-vault",
|
||||
"read_paths": "postgres/postgres-db",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "vault",
|
||||
"namespace": "vault",
|
||||
"service_accounts": "vault",
|
||||
"read_paths": "vault/*",
|
||||
"write_paths": "",
|
||||
},
|
||||
{
|
||||
"role": "sso-secrets",
|
||||
"namespace": "sso",
|
||||
"service_accounts": "mas-secrets-ensure",
|
||||
"read_paths": "shared/keycloak-admin",
|
||||
"write_paths": "harbor/harbor-oidc vault/vault-oidc-config comms/synapse-oidc "
|
||||
"logging/oauth2-proxy-logs-oidc finance/actual-oidc",
|
||||
},
|
||||
{
|
||||
"role": "crypto-secrets",
|
||||
"namespace": "crypto",
|
||||
"service_accounts": "crypto-secrets-ensure",
|
||||
"read_paths": "",
|
||||
"write_paths": "crypto/wallet-monero-temp-rpc-auth",
|
||||
},
|
||||
{
|
||||
"role": "comms-secrets",
|
||||
"namespace": "comms",
|
||||
"service_accounts": "comms-secrets-ensure,mas-db-ensure,mas-admin-client-secret-writer,othrys-synapse-signingkey-job",
|
||||
"read_paths": "",
|
||||
"write_paths": "comms/turn-shared-secret comms/livekit-api comms/synapse-redis comms/synapse-macaroon "
|
||||
"comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin comms/synapse-registration "
|
||||
"comms/mas-db comms/mas-admin-client-runtime comms/mas-secrets-runtime comms/othrys-synapse-signingkey",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
VAULT_ADMIN_POLICY = """
|
||||
path "sys/auth" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
path "sys/auth/*" {
|
||||
capabilities = ["create", "update", "delete", "sudo", "read"]
|
||||
}
|
||||
path "auth/kubernetes/*" {
|
||||
capabilities = ["create", "update", "read"]
|
||||
}
|
||||
path "auth/oidc/*" {
|
||||
capabilities = ["create", "update", "read"]
|
||||
}
|
||||
path "sys/policies/acl" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "sys/policies/acl/*" {
|
||||
capabilities = ["create", "update", "read"]
|
||||
}
|
||||
path "sys/internal/ui/mounts" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
path "sys/mounts" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
path "sys/mounts/auth/*" {
|
||||
capabilities = ["read", "update", "sudo"]
|
||||
}
|
||||
path "kv/data/atlas/vault/*" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
path "kv/metadata/atlas/vault/*" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/data/*" {
|
||||
capabilities = ["create", "update", "read", "delete", "patch"]
|
||||
}
|
||||
path "kv/metadata" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/metadata/*" {
|
||||
capabilities = ["read", "list", "delete"]
|
||||
}
|
||||
path "kv/data/atlas/shared/*" {
|
||||
capabilities = ["create", "update", "read", "patch"]
|
||||
}
|
||||
path "kv/metadata/atlas/shared/*" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
""".strip()
|
||||
|
||||
|
||||
DEV_KV_POLICY = """
|
||||
path "kv/metadata" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/metadata/atlas" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/metadata/atlas/shared" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/metadata/atlas/shared/*" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/data/atlas/shared/*" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
""".strip()
|
||||
@ -33,6 +33,8 @@ class VaultwardenLookup:
|
||||
|
||||
|
||||
class VaultwardenService:
|
||||
"""Invite eligible users to Vaultwarden through the admin interface."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._admin_lock = threading.Lock()
|
||||
self._admin_client: httpx.Client | None = None
|
||||
|
||||
@ -242,17 +242,12 @@ def _handle_existing_invite(state: VaultwardenInviteState) -> bool:
|
||||
state.counters.skipped += 1
|
||||
return True
|
||||
if not _should_refresh_invite(state.synced_ts):
|
||||
if not state.synced_at:
|
||||
_set_sync_status(state.username, state.status)
|
||||
state.counters.skipped += 1
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _sync_user(
|
||||
user: dict[str, Any],
|
||||
counters: VaultwardenSyncCounters,
|
||||
) -> tuple[str | None, bool]:
|
||||
def _sync_user(user: dict[str, Any], counters: VaultwardenSyncCounters) -> tuple[str | None, bool]:
|
||||
status: str | None = None
|
||||
ok = False
|
||||
normalized = _normalize_user(user)
|
||||
@ -297,6 +292,8 @@ def _sync_user(
|
||||
|
||||
|
||||
def run_vaultwarden_sync() -> VaultwardenSyncSummary:
|
||||
"""Process pending Vaultwarden invite failures until the queue is healthy."""
|
||||
|
||||
consecutive_failures = 0
|
||||
counters = VaultwardenSyncCounters()
|
||||
|
||||
|
||||
@ -3,7 +3,6 @@ from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
import textwrap
|
||||
|
||||
from ..k8s.exec import ExecError, PodExecutor
|
||||
from ..k8s.pods import PodSelectionError
|
||||
@ -12,6 +11,8 @@ from ..utils.logging import get_logger
|
||||
from ..utils.passwords import random_password
|
||||
from .keycloak_admin import keycloak_admin
|
||||
from .mailu import mailu
|
||||
from .wger_scripts import WGER_PASSWORD_CHECK_SCRIPT as _WGER_PASSWORD_CHECK_SCRIPT
|
||||
from .wger_scripts import WGER_SYNC_SCRIPT as _WGER_SYNC_SCRIPT
|
||||
|
||||
|
||||
EXIT_PASSWORD_MATCH = 0
|
||||
@ -23,179 +24,6 @@ WGER_PASSWORD_ROTATED_ATTR = "wger_password_rotated_at"
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
_WGER_SYNC_SCRIPT = textwrap.dedent(
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import django
|
||||
|
||||
|
||||
def _env(name: str, default: str = "") -> str:
|
||||
value = os.getenv(name, default)
|
||||
return value.strip() if isinstance(value, str) else ""
|
||||
|
||||
|
||||
def _setup_django() -> None:
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings.main")
|
||||
django.setup()
|
||||
|
||||
|
||||
def _set_default_gym(user) -> None:
|
||||
try:
|
||||
from wger.gym.models import GymConfig
|
||||
except Exception:
|
||||
return
|
||||
|
||||
try:
|
||||
config = GymConfig.objects.first()
|
||||
except Exception:
|
||||
return
|
||||
|
||||
if not config or not getattr(config, "default_gym", None):
|
||||
return
|
||||
|
||||
profile = getattr(user, "userprofile", None)
|
||||
if not profile or getattr(profile, "gym", None):
|
||||
return
|
||||
|
||||
profile.gym = config.default_gym
|
||||
profile.save()
|
||||
|
||||
|
||||
def _ensure_profile(user) -> None:
|
||||
profile = getattr(user, "userprofile", None)
|
||||
if not profile:
|
||||
return
|
||||
if hasattr(profile, "email_verified") and not profile.email_verified:
|
||||
profile.email_verified = True
|
||||
if hasattr(profile, "is_temporary") and profile.is_temporary:
|
||||
profile.is_temporary = False
|
||||
profile.save()
|
||||
|
||||
|
||||
def _ensure_admin(username: str, password: str, email: str) -> None:
|
||||
from django.contrib.auth.models import User
|
||||
|
||||
if not username or not password:
|
||||
raise RuntimeError("admin username/password missing")
|
||||
|
||||
user, created = User.objects.get_or_create(username=username)
|
||||
if created:
|
||||
user.is_active = True
|
||||
if not user.is_staff:
|
||||
user.is_staff = True
|
||||
if email:
|
||||
user.email = email
|
||||
user.set_password(password)
|
||||
user.save()
|
||||
|
||||
_ensure_profile(user)
|
||||
_set_default_gym(user)
|
||||
print(f"ensured admin user {username}")
|
||||
|
||||
|
||||
def _ensure_user(username: str, password: str, email: str) -> None:
|
||||
from django.contrib.auth.models import User
|
||||
|
||||
if not username or not password:
|
||||
raise RuntimeError("username/password missing")
|
||||
|
||||
user, created = User.objects.get_or_create(username=username)
|
||||
if created:
|
||||
user.is_active = True
|
||||
if email and user.email != email:
|
||||
user.email = email
|
||||
user.set_password(password)
|
||||
user.save()
|
||||
|
||||
_ensure_profile(user)
|
||||
_set_default_gym(user)
|
||||
action = "created" if created else "updated"
|
||||
print(f"{action} user {username}")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
admin_user = _env("WGER_ADMIN_USERNAME")
|
||||
admin_password = _env("WGER_ADMIN_PASSWORD")
|
||||
admin_email = _env("WGER_ADMIN_EMAIL")
|
||||
|
||||
username = _env("WGER_USERNAME") or _env("ONLY_USERNAME")
|
||||
password = _env("WGER_PASSWORD")
|
||||
email = _env("WGER_EMAIL")
|
||||
|
||||
if not any([admin_user and admin_password, username and password]):
|
||||
print("no admin or user payload provided; exiting")
|
||||
return 0
|
||||
|
||||
_setup_django()
|
||||
|
||||
if admin_user and admin_password:
|
||||
_ensure_admin(admin_user, admin_password, admin_email)
|
||||
|
||||
if username and password:
|
||||
_ensure_user(username, password, email)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
"""
|
||||
).strip()
|
||||
|
||||
_WGER_PASSWORD_CHECK_SCRIPT = textwrap.dedent(
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import django
|
||||
|
||||
|
||||
def _env(name: str, default: str = "") -> str:
|
||||
value = os.getenv(name, default)
|
||||
return value.strip() if isinstance(value, str) else ""
|
||||
|
||||
|
||||
def _setup_django() -> None:
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings.main")
|
||||
django.setup()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
username = _env("WGER_USERNAME")
|
||||
password = _env("WGER_PASSWORD")
|
||||
|
||||
if not username or not password:
|
||||
print("missing username or password")
|
||||
return 2
|
||||
|
||||
_setup_django()
|
||||
|
||||
from django.contrib.auth.models import User
|
||||
|
||||
user = User.objects.filter(username=username).first()
|
||||
if not user:
|
||||
print(f"user {username} missing")
|
||||
return 3
|
||||
|
||||
if user.check_password(password):
|
||||
print("password match")
|
||||
return 0
|
||||
|
||||
print("password mismatch")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
"""
|
||||
).strip()
|
||||
|
||||
|
||||
def _wger_exec_command() -> str:
|
||||
bootstrap = ". /vault/secrets/wger-env >/dev/null 2>&1 || true"
|
||||
@ -446,6 +274,8 @@ def _rotation_check_input(username: str) -> tuple[WgerSyncInput | UserSyncOutcom
|
||||
|
||||
|
||||
class WgerService:
|
||||
"""Synchronize Keycloak users and password rotations into Wger."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._executor = PodExecutor(
|
||||
settings.wger_namespace,
|
||||
|
||||
180
ariadne/services/wger_scripts.py
Normal file
180
ariadne/services/wger_scripts.py
Normal file
@ -0,0 +1,180 @@
|
||||
"""Embedded scripts executed inside the wger application pod."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import textwrap
|
||||
|
||||
WGER_SYNC_SCRIPT = textwrap.dedent(
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import django
|
||||
|
||||
|
||||
def _env(name: str, default: str = "") -> str:
|
||||
value = os.getenv(name, default)
|
||||
return value.strip() if isinstance(value, str) else ""
|
||||
|
||||
|
||||
def _setup_django() -> None:
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings.main")
|
||||
django.setup()
|
||||
|
||||
|
||||
def _set_default_gym(user) -> None:
|
||||
try:
|
||||
from wger.gym.models import GymConfig
|
||||
except Exception:
|
||||
return
|
||||
|
||||
try:
|
||||
config = GymConfig.objects.first()
|
||||
except Exception:
|
||||
return
|
||||
|
||||
if not config or not getattr(config, "default_gym", None):
|
||||
return
|
||||
|
||||
profile = getattr(user, "userprofile", None)
|
||||
if not profile or getattr(profile, "gym", None):
|
||||
return
|
||||
|
||||
profile.gym = config.default_gym
|
||||
profile.save()
|
||||
|
||||
|
||||
def _ensure_profile(user) -> None:
|
||||
profile = getattr(user, "userprofile", None)
|
||||
if not profile:
|
||||
return
|
||||
if hasattr(profile, "email_verified") and not profile.email_verified:
|
||||
profile.email_verified = True
|
||||
if hasattr(profile, "is_temporary") and profile.is_temporary:
|
||||
profile.is_temporary = False
|
||||
profile.save()
|
||||
|
||||
|
||||
def _ensure_admin(username: str, password: str, email: str) -> None:
|
||||
from django.contrib.auth.models import User
|
||||
|
||||
if not username or not password:
|
||||
raise RuntimeError("admin username/password missing")
|
||||
|
||||
user, created = User.objects.get_or_create(username=username)
|
||||
if created:
|
||||
user.is_active = True
|
||||
if not user.is_staff:
|
||||
user.is_staff = True
|
||||
if email:
|
||||
user.email = email
|
||||
user.set_password(password)
|
||||
user.save()
|
||||
|
||||
_ensure_profile(user)
|
||||
_set_default_gym(user)
|
||||
print(f"ensured admin user {username}")
|
||||
|
||||
|
||||
def _ensure_user(username: str, password: str, email: str) -> None:
|
||||
from django.contrib.auth.models import User
|
||||
|
||||
if not username or not password:
|
||||
raise RuntimeError("username/password missing")
|
||||
|
||||
user, created = User.objects.get_or_create(username=username)
|
||||
if created:
|
||||
user.is_active = True
|
||||
if email and user.email != email:
|
||||
user.email = email
|
||||
user.set_password(password)
|
||||
user.save()
|
||||
|
||||
_ensure_profile(user)
|
||||
_set_default_gym(user)
|
||||
action = "created" if created else "updated"
|
||||
print(f"{action} user {username}")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
admin_user = _env("WGER_ADMIN_USERNAME")
|
||||
admin_password = _env("WGER_ADMIN_PASSWORD")
|
||||
admin_email = _env("WGER_ADMIN_EMAIL")
|
||||
|
||||
username = _env("WGER_USERNAME") or _env("ONLY_USERNAME")
|
||||
password = _env("WGER_PASSWORD")
|
||||
email = _env("WGER_EMAIL")
|
||||
|
||||
if not any([admin_user and admin_password, username and password]):
|
||||
print("no admin or user payload provided; exiting")
|
||||
return 0
|
||||
|
||||
_setup_django()
|
||||
|
||||
if admin_user and admin_password:
|
||||
_ensure_admin(admin_user, admin_password, admin_email)
|
||||
|
||||
if username and password:
|
||||
_ensure_user(username, password, email)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
"""
|
||||
).strip()
|
||||
|
||||
WGER_PASSWORD_CHECK_SCRIPT = textwrap.dedent(
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import django
|
||||
|
||||
|
||||
def _env(name: str, default: str = "") -> str:
|
||||
value = os.getenv(name, default)
|
||||
return value.strip() if isinstance(value, str) else ""
|
||||
|
||||
|
||||
def _setup_django() -> None:
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings.main")
|
||||
django.setup()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
username = _env("WGER_USERNAME")
|
||||
password = _env("WGER_PASSWORD")
|
||||
|
||||
if not username or not password:
|
||||
print("missing username or password")
|
||||
return 2
|
||||
|
||||
_setup_django()
|
||||
|
||||
from django.contrib.auth.models import User
|
||||
|
||||
user = User.objects.filter(username=username).first()
|
||||
if not user:
|
||||
print(f"user {username} missing")
|
||||
return 3
|
||||
|
||||
if user.check_password(password):
|
||||
print("password match")
|
||||
return 0
|
||||
|
||||
print("password mismatch")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
"""
|
||||
).strip()
|
||||
|
||||
|
||||
@ -1,33 +1,28 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
|
||||
def _env(name: str, default: str = "") -> str:
|
||||
value = os.getenv(name, default)
|
||||
return value.strip() if isinstance(value, str) else default
|
||||
|
||||
|
||||
def _env_bool(name: str, default: str = "false") -> bool:
|
||||
return _env(name, default).lower() in {"1", "true", "yes", "y", "on"}
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
raw = _env(name, str(default))
|
||||
try:
|
||||
return int(raw)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
def _env_float(name: str, default: float) -> float:
|
||||
raw = _env(name, str(default))
|
||||
try:
|
||||
return float(raw)
|
||||
except ValueError:
|
||||
return default
|
||||
from .settings_env import _env, _env_bool, _env_float, _env_int
|
||||
from .settings_sections import (
|
||||
_cluster_state_config,
|
||||
_comms_config,
|
||||
_firefly_config,
|
||||
_image_sweeper_config,
|
||||
_jenkins_build_weather_config,
|
||||
_jenkins_workspace_cleanup_config,
|
||||
_keycloak_config,
|
||||
_mailu_config,
|
||||
_metis_config,
|
||||
_nextcloud_config,
|
||||
_opensearch_config,
|
||||
_platform_quality_probe_config,
|
||||
_portal_group_config,
|
||||
_schedule_config,
|
||||
_smtp_config,
|
||||
_vault_config,
|
||||
_vaultwarden_config,
|
||||
_wger_config,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@ -168,6 +163,15 @@ class Settings:
|
||||
platform_quality_probe_wait_timeout_sec: float
|
||||
platform_quality_probe_pushgateway_url: str
|
||||
platform_quality_probe_http_timeout_sec: int
|
||||
jenkins_base_url: str
|
||||
jenkins_api_user: str
|
||||
jenkins_api_token: str
|
||||
jenkins_api_timeout_sec: float
|
||||
jenkins_workspace_namespace: str
|
||||
jenkins_workspace_pvc_prefix: str
|
||||
jenkins_workspace_cleanup_min_age_hours: float
|
||||
jenkins_workspace_cleanup_dry_run: bool
|
||||
jenkins_workspace_cleanup_max_deletions_per_run: int
|
||||
|
||||
vaultwarden_namespace: str
|
||||
vaultwarden_pod_label: str
|
||||
@ -234,6 +238,8 @@ class Settings:
|
||||
metis_token_sync_vault_k8s_role: str
|
||||
metis_k3s_token_sync_cron: str
|
||||
platform_quality_suite_probe_cron: str
|
||||
jenkins_build_weather_cron: str
|
||||
jenkins_workspace_cleanup_cron: str
|
||||
|
||||
opensearch_url: str
|
||||
opensearch_limit_bytes: int
|
||||
@ -242,334 +248,26 @@ class Settings:
|
||||
|
||||
metrics_path: str
|
||||
|
||||
@classmethod
|
||||
def _keycloak_config(cls) -> dict[str, Any]:
|
||||
keycloak_url = _env("KEYCLOAK_URL", "https://sso.bstein.dev").rstrip("/")
|
||||
keycloak_realm = _env("KEYCLOAK_REALM", "atlas")
|
||||
keycloak_client_id = _env("KEYCLOAK_CLIENT_ID", "bstein-dev-home")
|
||||
keycloak_issuer = _env("KEYCLOAK_ISSUER", f"{keycloak_url}/realms/{keycloak_realm}").rstrip("/")
|
||||
keycloak_jwks_url = _env("KEYCLOAK_JWKS_URL", f"{keycloak_issuer}/protocol/openid-connect/certs").rstrip("/")
|
||||
return {
|
||||
"keycloak_url": keycloak_url,
|
||||
"keycloak_realm": keycloak_realm,
|
||||
"keycloak_client_id": keycloak_client_id,
|
||||
"keycloak_issuer": keycloak_issuer,
|
||||
"keycloak_jwks_url": keycloak_jwks_url,
|
||||
"keycloak_admin_url": _env("KEYCLOAK_ADMIN_URL", keycloak_url).rstrip("/"),
|
||||
"keycloak_admin_realm": _env("KEYCLOAK_ADMIN_REALM", keycloak_realm),
|
||||
"keycloak_admin_client_id": _env("KEYCLOAK_ADMIN_CLIENT_ID", ""),
|
||||
"keycloak_admin_client_secret": _env("KEYCLOAK_ADMIN_CLIENT_SECRET", ""),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _portal_group_config(cls) -> dict[str, Any]:
|
||||
return {
|
||||
"portal_admin_users": [u for u in (_env("PORTAL_ADMIN_USERS", "bstein")).split(",") if u.strip()],
|
||||
"portal_admin_groups": [g for g in (_env("PORTAL_ADMIN_GROUPS", "admin")).split(",") if g.strip()],
|
||||
"account_allowed_groups": [
|
||||
g for g in (_env("ACCOUNT_ALLOWED_GROUPS", "dev,admin")).split(",") if g.strip()
|
||||
],
|
||||
"allowed_flag_groups": [g for g in (_env("ALLOWED_FLAG_GROUPS", "demo,test")).split(",") if g.strip()],
|
||||
"default_user_groups": [g for g in (_env("DEFAULT_USER_GROUPS", "dev")).split(",") if g.strip()],
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _mailu_config(cls) -> dict[str, Any]:
|
||||
mailu_domain = _env("MAILU_DOMAIN", "bstein.dev")
|
||||
return {
|
||||
"mailu_domain": mailu_domain,
|
||||
"mailu_sync_url": _env(
|
||||
"MAILU_SYNC_URL",
|
||||
"http://mailu-sync-listener.mailu-mailserver.svc.cluster.local:8080/events",
|
||||
).rstrip("/"),
|
||||
"mailu_event_min_interval_sec": _env_float("MAILU_EVENT_MIN_INTERVAL_SEC", 10.0),
|
||||
"mailu_sync_wait_timeout_sec": _env_float("MAILU_SYNC_WAIT_TIMEOUT_SEC", 60.0),
|
||||
"mailu_mailbox_wait_timeout_sec": _env_float("MAILU_MAILBOX_WAIT_TIMEOUT_SEC", 60.0),
|
||||
"mailu_db_host": _env("MAILU_DB_HOST", "postgres-service.postgres.svc.cluster.local"),
|
||||
"mailu_db_port": _env_int("MAILU_DB_PORT", 5432),
|
||||
"mailu_db_name": _env("MAILU_DB_NAME", "mailu"),
|
||||
"mailu_db_user": _env("MAILU_DB_USER", "mailu"),
|
||||
"mailu_db_password": _env("MAILU_DB_PASSWORD", ""),
|
||||
"mailu_host": _env("MAILU_HOST", f"mail.{mailu_domain}"),
|
||||
"mailu_default_quota": _env_int("MAILU_DEFAULT_QUOTA", 20000000000),
|
||||
"mailu_system_users": [u for u in _env("MAILU_SYSTEM_USERS", "").split(",") if u.strip()],
|
||||
"mailu_system_password": _env("MAILU_SYSTEM_PASSWORD", ""),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _smtp_config(cls, mailu_domain: str) -> dict[str, Any]:
|
||||
return {
|
||||
"smtp_host": _env("SMTP_HOST", ""),
|
||||
"smtp_port": _env_int("SMTP_PORT", 25),
|
||||
"smtp_username": _env("SMTP_USERNAME", ""),
|
||||
"smtp_password": _env("SMTP_PASSWORD", ""),
|
||||
"smtp_starttls": _env_bool("SMTP_STARTTLS", "false"),
|
||||
"smtp_use_tls": _env_bool("SMTP_USE_TLS", "false"),
|
||||
"smtp_from": _env("SMTP_FROM", f"postmaster@{mailu_domain}"),
|
||||
"smtp_timeout_sec": _env_float("SMTP_TIMEOUT_SEC", 10.0),
|
||||
"welcome_email_enabled": _env_bool("WELCOME_EMAIL_ENABLED", "true"),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _nextcloud_config(cls) -> dict[str, Any]:
|
||||
return {
|
||||
"nextcloud_namespace": _env("NEXTCLOUD_NAMESPACE", "nextcloud"),
|
||||
"nextcloud_pod_label": _env("NEXTCLOUD_POD_LABEL", "app=nextcloud"),
|
||||
"nextcloud_container": _env("NEXTCLOUD_CONTAINER", "nextcloud"),
|
||||
"nextcloud_exec_timeout_sec": _env_float("NEXTCLOUD_EXEC_TIMEOUT_SEC", 120.0),
|
||||
"nextcloud_db_host": _env("NEXTCLOUD_DB_HOST", "postgres-service.postgres.svc.cluster.local"),
|
||||
"nextcloud_db_port": _env_int("NEXTCLOUD_DB_PORT", 5432),
|
||||
"nextcloud_db_name": _env("NEXTCLOUD_DB_NAME", "nextcloud"),
|
||||
"nextcloud_db_user": _env("NEXTCLOUD_DB_USER", "nextcloud"),
|
||||
"nextcloud_db_password": _env("NEXTCLOUD_DB_PASSWORD", ""),
|
||||
"nextcloud_url": _env("NEXTCLOUD_URL", "https://cloud.bstein.dev").rstrip("/"),
|
||||
"nextcloud_admin_user": _env("NEXTCLOUD_ADMIN_USER", ""),
|
||||
"nextcloud_admin_password": _env("NEXTCLOUD_ADMIN_PASSWORD", ""),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _wger_config(cls) -> dict[str, Any]:
|
||||
return {
|
||||
"wger_namespace": _env("WGER_NAMESPACE", "health"),
|
||||
"wger_user_sync_wait_timeout_sec": _env_float("WGER_USER_SYNC_WAIT_TIMEOUT_SEC", 60.0),
|
||||
"wger_pod_label": _env("WGER_POD_LABEL", "app=wger"),
|
||||
"wger_container": _env("WGER_CONTAINER", "wger"),
|
||||
"wger_admin_username": _env("WGER_ADMIN_USERNAME", ""),
|
||||
"wger_admin_password": _env("WGER_ADMIN_PASSWORD", ""),
|
||||
"wger_admin_email": _env("WGER_ADMIN_EMAIL", ""),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _firefly_config(cls) -> dict[str, Any]:
|
||||
return {
|
||||
"firefly_namespace": _env("FIREFLY_NAMESPACE", "finance"),
|
||||
"firefly_user_sync_wait_timeout_sec": _env_float("FIREFLY_USER_SYNC_WAIT_TIMEOUT_SEC", 90.0),
|
||||
"firefly_pod_label": _env("FIREFLY_POD_LABEL", "app=firefly"),
|
||||
"firefly_container": _env("FIREFLY_CONTAINER", "firefly"),
|
||||
"firefly_cron_base_url": _env(
|
||||
"FIREFLY_CRON_BASE_URL",
|
||||
"http://firefly.finance.svc.cluster.local/api/v1/cron",
|
||||
),
|
||||
"firefly_cron_token": _env("FIREFLY_CRON_TOKEN", ""),
|
||||
"firefly_cron_timeout_sec": _env_float("FIREFLY_CRON_TIMEOUT_SEC", 30.0),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _vault_config(cls) -> dict[str, Any]:
|
||||
return {
|
||||
"vault_namespace": _env("VAULT_NAMESPACE", "vault"),
|
||||
"vault_addr": _env("VAULT_ADDR", "http://vault.vault.svc.cluster.local:8200").rstrip("/"),
|
||||
"vault_token": _env("VAULT_TOKEN", ""),
|
||||
"vault_k8s_role": _env("VAULT_K8S_ROLE", "vault"),
|
||||
"vault_k8s_role_ttl": _env("VAULT_K8S_ROLE_TTL", "1h"),
|
||||
"vault_k8s_token_reviewer_jwt": _env("VAULT_K8S_TOKEN_REVIEWER_JWT", ""),
|
||||
"vault_k8s_token_reviewer_jwt_file": _env("VAULT_K8S_TOKEN_REVIEWER_JWT_FILE", ""),
|
||||
"vault_oidc_discovery_url": _env("VAULT_OIDC_DISCOVERY_URL", ""),
|
||||
"vault_oidc_client_id": _env("VAULT_OIDC_CLIENT_ID", ""),
|
||||
"vault_oidc_client_secret": _env("VAULT_OIDC_CLIENT_SECRET", ""),
|
||||
"vault_oidc_default_role": _env("VAULT_OIDC_DEFAULT_ROLE", "admin"),
|
||||
"vault_oidc_scopes": _env("VAULT_OIDC_SCOPES", "openid profile email groups"),
|
||||
"vault_oidc_user_claim": _env("VAULT_OIDC_USER_CLAIM", "preferred_username"),
|
||||
"vault_oidc_groups_claim": _env("VAULT_OIDC_GROUPS_CLAIM", "groups"),
|
||||
"vault_oidc_token_policies": _env("VAULT_OIDC_TOKEN_POLICIES", ""),
|
||||
"vault_oidc_admin_group": _env("VAULT_OIDC_ADMIN_GROUP", "admin"),
|
||||
"vault_oidc_admin_policies": _env("VAULT_OIDC_ADMIN_POLICIES", "default,vault-admin"),
|
||||
"vault_oidc_dev_group": _env("VAULT_OIDC_DEV_GROUP", "dev"),
|
||||
"vault_oidc_dev_policies": _env("VAULT_OIDC_DEV_POLICIES", "default,dev-kv"),
|
||||
"vault_oidc_user_group": _env("VAULT_OIDC_USER_GROUP", ""),
|
||||
"vault_oidc_user_policies": _env("VAULT_OIDC_USER_POLICIES", ""),
|
||||
"vault_oidc_redirect_uris": _env(
|
||||
"VAULT_OIDC_REDIRECT_URIS",
|
||||
"https://secret.bstein.dev/ui/vault/auth/oidc/oidc/callback",
|
||||
),
|
||||
"vault_oidc_bound_audiences": _env("VAULT_OIDC_BOUND_AUDIENCES", ""),
|
||||
"vault_oidc_bound_claims_type": _env("VAULT_OIDC_BOUND_CLAIMS_TYPE", "string"),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _comms_config(cls) -> dict[str, Any]:
|
||||
return {
|
||||
"comms_namespace": _env("COMMS_NAMESPACE", "comms"),
|
||||
"comms_synapse_base": _env(
|
||||
"COMMS_SYNAPSE_BASE",
|
||||
"http://othrys-synapse-matrix-synapse:8008",
|
||||
).rstrip("/"),
|
||||
"comms_auth_base": _env(
|
||||
"COMMS_AUTH_BASE",
|
||||
"http://matrix-authentication-service:8080",
|
||||
).rstrip("/"),
|
||||
"comms_mas_admin_api_base": _env(
|
||||
"COMMS_MAS_ADMIN_API_BASE",
|
||||
"http://matrix-authentication-service:8081/api/admin/v1",
|
||||
).rstrip("/"),
|
||||
"comms_mas_token_url": _env(
|
||||
"COMMS_MAS_TOKEN_URL",
|
||||
"http://matrix-authentication-service:8080/oauth2/token",
|
||||
),
|
||||
"comms_mas_admin_client_id": _env("COMMS_MAS_ADMIN_CLIENT_ID", "01KDXMVQBQ5JNY6SEJPZW6Z8BM"),
|
||||
"comms_mas_admin_client_secret": _env("COMMS_MAS_ADMIN_CLIENT_SECRET", ""),
|
||||
"comms_server_name": _env("COMMS_SERVER_NAME", "live.bstein.dev"),
|
||||
"comms_room_alias": _env("COMMS_ROOM_ALIAS", "#othrys:live.bstein.dev"),
|
||||
"comms_room_name": _env("COMMS_ROOM_NAME", "Othrys"),
|
||||
"comms_pin_message": _env(
|
||||
"COMMS_PIN_MESSAGE",
|
||||
"Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join and choose 'Continue' -> 'Join as guest'.",
|
||||
),
|
||||
"comms_seeder_user": _env("COMMS_SEEDER_USER", "othrys-seeder"),
|
||||
"comms_seeder_password": _env("COMMS_SEEDER_PASSWORD", ""),
|
||||
"comms_bot_user": _env("COMMS_BOT_USER", "atlasbot"),
|
||||
"comms_bot_password": _env("COMMS_BOT_PASSWORD", ""),
|
||||
"comms_synapse_db_host": _env(
|
||||
"COMMS_SYNAPSE_DB_HOST",
|
||||
"postgres-service.postgres.svc.cluster.local",
|
||||
),
|
||||
"comms_synapse_db_port": _env_int("COMMS_SYNAPSE_DB_PORT", 5432),
|
||||
"comms_synapse_db_name": _env("COMMS_SYNAPSE_DB_NAME", "synapse"),
|
||||
"comms_synapse_db_user": _env("COMMS_SYNAPSE_DB_USER", "synapse"),
|
||||
"comms_synapse_db_password": _env("COMMS_SYNAPSE_DB_PASSWORD", ""),
|
||||
"comms_synapse_admin_token": _env("COMMS_SYNAPSE_ADMIN_TOKEN", ""),
|
||||
"comms_timeout_sec": _env_float("COMMS_TIMEOUT_SEC", 30.0),
|
||||
"comms_guest_stale_days": _env_int("COMMS_GUEST_STALE_DAYS", 14),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _image_sweeper_config(cls) -> dict[str, Any]:
|
||||
return {
|
||||
"image_sweeper_namespace": _env("IMAGE_SWEEPER_NAMESPACE", "maintenance"),
|
||||
"image_sweeper_service_account": _env("IMAGE_SWEEPER_SERVICE_ACCOUNT", "node-image-sweeper"),
|
||||
"image_sweeper_job_ttl_sec": _env_int("IMAGE_SWEEPER_JOB_TTL_SEC", 3600),
|
||||
"image_sweeper_wait_timeout_sec": _env_float("IMAGE_SWEEPER_WAIT_TIMEOUT_SEC", 1200.0),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _platform_quality_probe_config(cls) -> dict[str, Any]:
|
||||
return {
|
||||
"platform_quality_probe_namespace": _env("PLATFORM_QUALITY_PROBE_NAMESPACE", "monitoring"),
|
||||
"platform_quality_probe_script_configmap": _env(
|
||||
"PLATFORM_QUALITY_PROBE_SCRIPT_CONFIGMAP",
|
||||
"platform-quality-suite-probe-script",
|
||||
),
|
||||
"platform_quality_probe_image": _env("PLATFORM_QUALITY_PROBE_IMAGE", "curlimages/curl:8.12.1"),
|
||||
"platform_quality_probe_job_ttl_sec": _env_int("PLATFORM_QUALITY_PROBE_JOB_TTL_SEC", 1800),
|
||||
"platform_quality_probe_wait_timeout_sec": _env_float("PLATFORM_QUALITY_PROBE_WAIT_TIMEOUT_SEC", 180.0),
|
||||
"platform_quality_probe_pushgateway_url": _env(
|
||||
"PLATFORM_QUALITY_PROBE_PUSHGATEWAY_URL",
|
||||
"http://platform-quality-gateway.monitoring.svc.cluster.local:9091",
|
||||
).rstrip("/"),
|
||||
"platform_quality_probe_http_timeout_sec": _env_int("PLATFORM_QUALITY_PROBE_HTTP_TIMEOUT_SECONDS", 12),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _vaultwarden_config(cls) -> dict[str, Any]:
|
||||
return {
|
||||
"vaultwarden_namespace": _env("VAULTWARDEN_NAMESPACE", "vaultwarden"),
|
||||
"vaultwarden_pod_label": _env("VAULTWARDEN_POD_LABEL", "app=vaultwarden"),
|
||||
"vaultwarden_pod_port": _env_int("VAULTWARDEN_POD_PORT", 80),
|
||||
"vaultwarden_service_host": _env(
|
||||
"VAULTWARDEN_SERVICE_HOST",
|
||||
"vaultwarden-service.vaultwarden.svc.cluster.local",
|
||||
),
|
||||
"vaultwarden_admin_secret_name": _env("VAULTWARDEN_ADMIN_SECRET_NAME", "vaultwarden-admin"),
|
||||
"vaultwarden_admin_secret_key": _env("VAULTWARDEN_ADMIN_SECRET_KEY", "ADMIN_TOKEN"),
|
||||
"vaultwarden_admin_session_ttl_sec": _env_float("VAULTWARDEN_ADMIN_SESSION_TTL_SEC", 300.0),
|
||||
"vaultwarden_admin_rate_limit_backoff_sec": _env_float("VAULTWARDEN_ADMIN_RATE_LIMIT_BACKOFF_SEC", 600.0),
|
||||
"vaultwarden_retry_cooldown_sec": _env_float("VAULTWARDEN_RETRY_COOLDOWN_SEC", 1800.0),
|
||||
"vaultwarden_failure_bailout": _env_int("VAULTWARDEN_FAILURE_BAILOUT", 2),
|
||||
"vaultwarden_invite_refresh_sec": _env_float("VAULTWARDEN_INVITE_REFRESH_SEC", 86400.0),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _schedule_config(cls) -> dict[str, Any]:
|
||||
return {
|
||||
"mailu_sync_cron": _env("ARIADNE_SCHEDULE_MAILU_SYNC", "30 4 * * *"),
|
||||
"nextcloud_sync_cron": _env("ARIADNE_SCHEDULE_NEXTCLOUD_SYNC", "0 5 * * *"),
|
||||
"nextcloud_cron": _env("ARIADNE_SCHEDULE_NEXTCLOUD_CRON", "*/5 * * * *"),
|
||||
"nextcloud_maintenance_cron": _env("ARIADNE_SCHEDULE_NEXTCLOUD_MAINTENANCE", "30 4 * * *"),
|
||||
"vaultwarden_sync_cron": _env("ARIADNE_SCHEDULE_VAULTWARDEN_SYNC", "0 * * * *"),
|
||||
"wger_user_sync_cron": _env("ARIADNE_SCHEDULE_WGER_USER_SYNC", "0 5 * * *"),
|
||||
"wger_admin_cron": _env("ARIADNE_SCHEDULE_WGER_ADMIN", "15 3 * * *"),
|
||||
"firefly_user_sync_cron": _env("ARIADNE_SCHEDULE_FIREFLY_USER_SYNC", "0 6 * * *"),
|
||||
"firefly_cron": _env("ARIADNE_SCHEDULE_FIREFLY_CRON", "0 3 * * *"),
|
||||
"pod_cleaner_cron": _env("ARIADNE_SCHEDULE_POD_CLEANER", "0 * * * *"),
|
||||
"opensearch_prune_cron": _env("ARIADNE_SCHEDULE_OPENSEARCH_PRUNE", "23 3 * * *"),
|
||||
"image_sweeper_cron": _env("ARIADNE_SCHEDULE_IMAGE_SWEEPER", "30 4 * * 0"),
|
||||
"vault_k8s_auth_cron": _env("ARIADNE_SCHEDULE_VAULT_K8S_AUTH", "0 * * * *"),
|
||||
"vault_oidc_cron": _env("ARIADNE_SCHEDULE_VAULT_OIDC", "0 * * * *"),
|
||||
"comms_guest_name_cron": _env("ARIADNE_SCHEDULE_COMMS_GUEST_NAME", "*/5 * * * *"),
|
||||
"comms_pin_invite_cron": _env("ARIADNE_SCHEDULE_COMMS_PIN_INVITE", "*/30 * * * *"),
|
||||
"comms_reset_room_cron": _env("ARIADNE_SCHEDULE_COMMS_RESET_ROOM", "0 0 1 1 *"),
|
||||
"comms_seed_room_cron": _env("ARIADNE_SCHEDULE_COMMS_SEED_ROOM", "*/10 * * * *"),
|
||||
"keycloak_profile_cron": _env("ARIADNE_SCHEDULE_KEYCLOAK_PROFILE", "0 */6 * * *"),
|
||||
"metis_k3s_token_sync_cron": _env("ARIADNE_SCHEDULE_METIS_K3S_TOKEN_SYNC", "11 */6 * * *"),
|
||||
"platform_quality_suite_probe_cron": _env(
|
||||
"ARIADNE_SCHEDULE_PLATFORM_QUALITY_SUITE_PROBE",
|
||||
"*/15 * * * *",
|
||||
),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _cluster_state_config(cls) -> dict[str, Any]:
|
||||
return {
|
||||
"vm_url": _env(
|
||||
"ARIADNE_VM_URL",
|
||||
"http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428",
|
||||
).rstrip("/"),
|
||||
"cluster_state_vm_timeout_sec": _env_float("ARIADNE_CLUSTER_STATE_VM_TIMEOUT_SEC", 5.0),
|
||||
"alertmanager_url": _env("ARIADNE_ALERTMANAGER_URL", "").rstrip("/"),
|
||||
"cluster_state_cron": _env("ARIADNE_SCHEDULE_CLUSTER_STATE", "*/15 * * * *"),
|
||||
"cluster_state_keep": _env_int("ARIADNE_CLUSTER_STATE_KEEP", 168),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _metis_config(cls) -> dict[str, Any]:
|
||||
return {
|
||||
"metis_base_url": _env("METIS_BASE_URL", "http://metis.maintenance.svc.cluster.local").rstrip("/"),
|
||||
"metis_watch_url": _env("METIS_WATCH_URL", "").rstrip("/"),
|
||||
"metis_timeout_sec": _env_float("METIS_TIMEOUT_SEC", 10.0),
|
||||
"metis_sentinel_watch_cron": _env("ARIADNE_SCHEDULE_METIS_SENTINEL_WATCH", "*/15 * * * *"),
|
||||
"metis_token_sync_namespace": _env("METIS_TOKEN_SYNC_NAMESPACE", "maintenance"),
|
||||
"metis_token_sync_service_account": _env("METIS_TOKEN_SYNC_SERVICE_ACCOUNT", "metis-token-sync"),
|
||||
"metis_token_sync_node_name": _env("METIS_TOKEN_SYNC_NODE_NAME", "titan-0a"),
|
||||
"metis_token_sync_image": _env("METIS_TOKEN_SYNC_IMAGE", "hashicorp/vault:1.17.6"),
|
||||
"metis_token_sync_job_ttl_sec": _env_int("METIS_TOKEN_SYNC_JOB_TTL_SEC", 1800),
|
||||
"metis_token_sync_wait_timeout_sec": _env_float("METIS_TOKEN_SYNC_WAIT_TIMEOUT_SEC", 180.0),
|
||||
"metis_token_sync_vault_addr": _env(
|
||||
"METIS_TOKEN_SYNC_VAULT_ADDR",
|
||||
"http://vault.vault.svc.cluster.local:8200",
|
||||
).rstrip("/"),
|
||||
"metis_token_sync_vault_k8s_role": _env("METIS_TOKEN_SYNC_VAULT_K8S_ROLE", "maintenance-metis-token-sync"),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _opensearch_config(cls) -> dict[str, Any]:
|
||||
return {
|
||||
"opensearch_url": _env(
|
||||
"OPENSEARCH_URL",
|
||||
"http://opensearch-master.logging.svc.cluster.local:9200",
|
||||
).rstrip("/"),
|
||||
"opensearch_limit_bytes": _env_int("OPENSEARCH_LIMIT_BYTES", 1024**4),
|
||||
"opensearch_index_patterns": _env("OPENSEARCH_INDEX_PATTERNS", "kube-*,journald-*"),
|
||||
"opensearch_timeout_sec": _env_float("OPENSEARCH_TIMEOUT_SEC", 30.0),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_env(cls) -> "Settings":
|
||||
keycloak_cfg = cls._keycloak_config()
|
||||
portal_cfg = cls._portal_group_config()
|
||||
mailu_cfg = cls._mailu_config()
|
||||
smtp_cfg = cls._smtp_config(mailu_cfg["mailu_domain"])
|
||||
nextcloud_cfg = cls._nextcloud_config()
|
||||
wger_cfg = cls._wger_config()
|
||||
firefly_cfg = cls._firefly_config()
|
||||
vault_cfg = cls._vault_config()
|
||||
comms_cfg = cls._comms_config()
|
||||
image_cfg = cls._image_sweeper_config()
|
||||
platform_quality_probe_cfg = cls._platform_quality_probe_config()
|
||||
vaultwarden_cfg = cls._vaultwarden_config()
|
||||
schedule_cfg = cls._schedule_config()
|
||||
cluster_cfg = cls._cluster_state_config()
|
||||
metis_cfg = cls._metis_config()
|
||||
opensearch_cfg = cls._opensearch_config()
|
||||
keycloak_cfg = _keycloak_config()
|
||||
portal_cfg = _portal_group_config()
|
||||
mailu_cfg = _mailu_config()
|
||||
smtp_cfg = _smtp_config(mailu_cfg["mailu_domain"])
|
||||
nextcloud_cfg = _nextcloud_config()
|
||||
wger_cfg = _wger_config()
|
||||
firefly_cfg = _firefly_config()
|
||||
vault_cfg = _vault_config()
|
||||
comms_cfg = _comms_config()
|
||||
image_cfg = _image_sweeper_config()
|
||||
platform_quality_probe_cfg = _platform_quality_probe_config()
|
||||
jenkins_build_weather_cfg = _jenkins_build_weather_config()
|
||||
jenkins_workspace_cleanup_cfg = _jenkins_workspace_cleanup_config()
|
||||
vaultwarden_cfg = _vaultwarden_config()
|
||||
schedule_cfg = _schedule_config()
|
||||
cluster_cfg = _cluster_state_config()
|
||||
metis_cfg = _metis_config()
|
||||
opensearch_cfg = _opensearch_config()
|
||||
|
||||
portal_db = _env("PORTAL_DATABASE_URL", "")
|
||||
ariadne_db = _env("ARIADNE_DATABASE_URL", portal_db)
|
||||
@ -605,6 +303,8 @@ class Settings:
|
||||
**comms_cfg,
|
||||
**image_cfg,
|
||||
**platform_quality_probe_cfg,
|
||||
**jenkins_build_weather_cfg,
|
||||
**jenkins_workspace_cleanup_cfg,
|
||||
**vaultwarden_cfg,
|
||||
**schedule_cfg,
|
||||
**cluster_cfg,
|
||||
|
||||
28
ariadne/settings_env.py
Normal file
28
ariadne/settings_env.py
Normal file
@ -0,0 +1,28 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
|
||||
def _env(name: str, default: str = "") -> str:
|
||||
value = os.getenv(name, default)
|
||||
return value.strip() if isinstance(value, str) else default
|
||||
|
||||
|
||||
def _env_bool(name: str, default: str = "false") -> bool:
|
||||
return _env(name, default).lower() in {"1", "true", "yes", "y", "on"}
|
||||
|
||||
|
||||
def _env_int(name: str, default: int) -> int:
|
||||
raw = _env(name, str(default))
|
||||
try:
|
||||
return int(raw)
|
||||
except ValueError:
|
||||
return default
|
||||
|
||||
|
||||
def _env_float(name: str, default: float) -> float:
|
||||
raw = _env(name, str(default))
|
||||
try:
|
||||
return float(raw)
|
||||
except ValueError:
|
||||
return default
|
||||
343
ariadne/settings_sections.py
Normal file
343
ariadne/settings_sections.py
Normal file
@ -0,0 +1,343 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from .settings_env import _env, _env_bool, _env_float, _env_int
|
||||
|
||||
|
||||
def _keycloak_config() -> dict[str, Any]:
|
||||
keycloak_url = _env("KEYCLOAK_URL", "https://sso.bstein.dev").rstrip("/")
|
||||
keycloak_realm = _env("KEYCLOAK_REALM", "atlas")
|
||||
keycloak_client_id = _env("KEYCLOAK_CLIENT_ID", "bstein-dev-home")
|
||||
keycloak_issuer = _env("KEYCLOAK_ISSUER", f"{keycloak_url}/realms/{keycloak_realm}").rstrip("/")
|
||||
keycloak_jwks_url = _env("KEYCLOAK_JWKS_URL", f"{keycloak_issuer}/protocol/openid-connect/certs").rstrip("/")
|
||||
return {
|
||||
"keycloak_url": keycloak_url,
|
||||
"keycloak_realm": keycloak_realm,
|
||||
"keycloak_client_id": keycloak_client_id,
|
||||
"keycloak_issuer": keycloak_issuer,
|
||||
"keycloak_jwks_url": keycloak_jwks_url,
|
||||
"keycloak_admin_url": _env("KEYCLOAK_ADMIN_URL", keycloak_url).rstrip("/"),
|
||||
"keycloak_admin_realm": _env("KEYCLOAK_ADMIN_REALM", keycloak_realm),
|
||||
"keycloak_admin_client_id": _env("KEYCLOAK_ADMIN_CLIENT_ID", ""),
|
||||
"keycloak_admin_client_secret": _env("KEYCLOAK_ADMIN_CLIENT_SECRET", ""),
|
||||
}
|
||||
|
||||
|
||||
def _portal_group_config() -> dict[str, Any]:
|
||||
return {
|
||||
"portal_admin_users": [u for u in (_env("PORTAL_ADMIN_USERS", "bstein")).split(",") if u.strip()],
|
||||
"portal_admin_groups": [g for g in (_env("PORTAL_ADMIN_GROUPS", "admin")).split(",") if g.strip()],
|
||||
"account_allowed_groups": [g for g in (_env("ACCOUNT_ALLOWED_GROUPS", "dev,admin")).split(",") if g.strip()],
|
||||
"allowed_flag_groups": [g for g in (_env("ALLOWED_FLAG_GROUPS", "demo,test")).split(",") if g.strip()],
|
||||
"default_user_groups": [g for g in (_env("DEFAULT_USER_GROUPS", "dev")).split(",") if g.strip()],
|
||||
}
|
||||
|
||||
|
||||
def _mailu_config() -> dict[str, Any]:
|
||||
mailu_domain = _env("MAILU_DOMAIN", "bstein.dev")
|
||||
return {
|
||||
"mailu_domain": mailu_domain,
|
||||
"mailu_sync_url": _env(
|
||||
"MAILU_SYNC_URL",
|
||||
"http://mailu-sync-listener.mailu-mailserver.svc.cluster.local:8080/events",
|
||||
).rstrip("/"),
|
||||
"mailu_event_min_interval_sec": _env_float("MAILU_EVENT_MIN_INTERVAL_SEC", 10.0),
|
||||
"mailu_sync_wait_timeout_sec": _env_float("MAILU_SYNC_WAIT_TIMEOUT_SEC", 60.0),
|
||||
"mailu_mailbox_wait_timeout_sec": _env_float("MAILU_MAILBOX_WAIT_TIMEOUT_SEC", 60.0),
|
||||
"mailu_db_host": _env("MAILU_DB_HOST", "postgres-service.postgres.svc.cluster.local"),
|
||||
"mailu_db_port": _env_int("MAILU_DB_PORT", 5432),
|
||||
"mailu_db_name": _env("MAILU_DB_NAME", "mailu"),
|
||||
"mailu_db_user": _env("MAILU_DB_USER", "mailu"),
|
||||
"mailu_db_password": _env("MAILU_DB_PASSWORD", ""),
|
||||
"mailu_host": _env("MAILU_HOST", f"mail.{mailu_domain}"),
|
||||
"mailu_default_quota": _env_int("MAILU_DEFAULT_QUOTA", 20000000000),
|
||||
"mailu_system_users": [u for u in _env("MAILU_SYSTEM_USERS", "").split(",") if u.strip()],
|
||||
"mailu_system_password": _env("MAILU_SYSTEM_PASSWORD", ""),
|
||||
}
|
||||
|
||||
|
||||
def _smtp_config(mailu_domain: str) -> dict[str, Any]:
|
||||
return {
|
||||
"smtp_host": _env("SMTP_HOST", ""),
|
||||
"smtp_port": _env_int("SMTP_PORT", 25),
|
||||
"smtp_username": _env("SMTP_USERNAME", ""),
|
||||
"smtp_password": _env("SMTP_PASSWORD", ""),
|
||||
"smtp_starttls": _env_bool("SMTP_STARTTLS", "false"),
|
||||
"smtp_use_tls": _env_bool("SMTP_USE_TLS", "false"),
|
||||
"smtp_from": _env("SMTP_FROM", f"postmaster@{mailu_domain}"),
|
||||
"smtp_timeout_sec": _env_float("SMTP_TIMEOUT_SEC", 10.0),
|
||||
"welcome_email_enabled": _env_bool("WELCOME_EMAIL_ENABLED", "true"),
|
||||
}
|
||||
|
||||
|
||||
def _nextcloud_config() -> dict[str, Any]:
|
||||
return {
|
||||
"nextcloud_namespace": _env("NEXTCLOUD_NAMESPACE", "nextcloud"),
|
||||
"nextcloud_pod_label": _env("NEXTCLOUD_POD_LABEL", "app=nextcloud"),
|
||||
"nextcloud_container": _env("NEXTCLOUD_CONTAINER", "nextcloud"),
|
||||
"nextcloud_exec_timeout_sec": _env_float("NEXTCLOUD_EXEC_TIMEOUT_SEC", 120.0),
|
||||
"nextcloud_db_host": _env("NEXTCLOUD_DB_HOST", "postgres-service.postgres.svc.cluster.local"),
|
||||
"nextcloud_db_port": _env_int("NEXTCLOUD_DB_PORT", 5432),
|
||||
"nextcloud_db_name": _env("NEXTCLOUD_DB_NAME", "nextcloud"),
|
||||
"nextcloud_db_user": _env("NEXTCLOUD_DB_USER", "nextcloud"),
|
||||
"nextcloud_db_password": _env("NEXTCLOUD_DB_PASSWORD", ""),
|
||||
"nextcloud_url": _env("NEXTCLOUD_URL", "https://cloud.bstein.dev").rstrip("/"),
|
||||
"nextcloud_admin_user": _env("NEXTCLOUD_ADMIN_USER", ""),
|
||||
"nextcloud_admin_password": _env("NEXTCLOUD_ADMIN_PASSWORD", ""),
|
||||
}
|
||||
|
||||
|
||||
def _wger_config() -> dict[str, Any]:
|
||||
return {
|
||||
"wger_namespace": _env("WGER_NAMESPACE", "health"),
|
||||
"wger_user_sync_wait_timeout_sec": _env_float("WGER_USER_SYNC_WAIT_TIMEOUT_SEC", 60.0),
|
||||
"wger_pod_label": _env("WGER_POD_LABEL", "app=wger"),
|
||||
"wger_container": _env("WGER_CONTAINER", "wger"),
|
||||
"wger_admin_username": _env("WGER_ADMIN_USERNAME", ""),
|
||||
"wger_admin_password": _env("WGER_ADMIN_PASSWORD", ""),
|
||||
"wger_admin_email": _env("WGER_ADMIN_EMAIL", ""),
|
||||
}
|
||||
|
||||
|
||||
def _firefly_config() -> dict[str, Any]:
|
||||
return {
|
||||
"firefly_namespace": _env("FIREFLY_NAMESPACE", "finance"),
|
||||
"firefly_user_sync_wait_timeout_sec": _env_float("FIREFLY_USER_SYNC_WAIT_TIMEOUT_SEC", 90.0),
|
||||
"firefly_pod_label": _env("FIREFLY_POD_LABEL", "app=firefly"),
|
||||
"firefly_container": _env("FIREFLY_CONTAINER", "firefly"),
|
||||
"firefly_cron_base_url": _env(
|
||||
"FIREFLY_CRON_BASE_URL",
|
||||
"http://firefly.finance.svc.cluster.local/api/v1/cron",
|
||||
),
|
||||
"firefly_cron_token": _env("FIREFLY_CRON_TOKEN", ""),
|
||||
"firefly_cron_timeout_sec": _env_float("FIREFLY_CRON_TIMEOUT_SEC", 30.0),
|
||||
}
|
||||
|
||||
|
||||
def _vault_config() -> dict[str, Any]:
|
||||
return {
|
||||
"vault_namespace": _env("VAULT_NAMESPACE", "vault"),
|
||||
"vault_addr": _env("VAULT_ADDR", "http://vault.vault.svc.cluster.local:8200").rstrip("/"),
|
||||
"vault_token": _env("VAULT_TOKEN", ""),
|
||||
"vault_k8s_role": _env("VAULT_K8S_ROLE", "vault"),
|
||||
"vault_k8s_role_ttl": _env("VAULT_K8S_ROLE_TTL", "1h"),
|
||||
"vault_k8s_token_reviewer_jwt": _env("VAULT_K8S_TOKEN_REVIEWER_JWT", ""),
|
||||
"vault_k8s_token_reviewer_jwt_file": _env("VAULT_K8S_TOKEN_REVIEWER_JWT_FILE", ""),
|
||||
"vault_oidc_discovery_url": _env("VAULT_OIDC_DISCOVERY_URL", ""),
|
||||
"vault_oidc_client_id": _env("VAULT_OIDC_CLIENT_ID", ""),
|
||||
"vault_oidc_client_secret": _env("VAULT_OIDC_CLIENT_SECRET", ""),
|
||||
"vault_oidc_default_role": _env("VAULT_OIDC_DEFAULT_ROLE", "admin"),
|
||||
"vault_oidc_scopes": _env("VAULT_OIDC_SCOPES", "openid profile email groups"),
|
||||
"vault_oidc_user_claim": _env("VAULT_OIDC_USER_CLAIM", "preferred_username"),
|
||||
"vault_oidc_groups_claim": _env("VAULT_OIDC_GROUPS_CLAIM", "groups"),
|
||||
"vault_oidc_token_policies": _env("VAULT_OIDC_TOKEN_POLICIES", ""),
|
||||
"vault_oidc_admin_group": _env("VAULT_OIDC_ADMIN_GROUP", "admin"),
|
||||
"vault_oidc_admin_policies": _env("VAULT_OIDC_ADMIN_POLICIES", "default,vault-admin"),
|
||||
"vault_oidc_dev_group": _env("VAULT_OIDC_DEV_GROUP", "dev"),
|
||||
"vault_oidc_dev_policies": _env("VAULT_OIDC_DEV_POLICIES", "default,dev-kv"),
|
||||
"vault_oidc_user_group": _env("VAULT_OIDC_USER_GROUP", ""),
|
||||
"vault_oidc_user_policies": _env("VAULT_OIDC_USER_POLICIES", ""),
|
||||
"vault_oidc_redirect_uris": _env(
|
||||
"VAULT_OIDC_REDIRECT_URIS",
|
||||
"https://secret.bstein.dev/ui/vault/auth/oidc/oidc/callback",
|
||||
),
|
||||
"vault_oidc_bound_audiences": _env("VAULT_OIDC_BOUND_AUDIENCES", ""),
|
||||
"vault_oidc_bound_claims_type": _env("VAULT_OIDC_BOUND_CLAIMS_TYPE", "string"),
|
||||
}
|
||||
|
||||
|
||||
def _comms_config() -> dict[str, Any]:
|
||||
return {
|
||||
"comms_namespace": _env("COMMS_NAMESPACE", "comms"),
|
||||
"comms_synapse_base": _env(
|
||||
"COMMS_SYNAPSE_BASE",
|
||||
"http://othrys-synapse-matrix-synapse:8008",
|
||||
).rstrip("/"),
|
||||
"comms_auth_base": _env(
|
||||
"COMMS_AUTH_BASE",
|
||||
"http://matrix-authentication-service:8080",
|
||||
).rstrip("/"),
|
||||
"comms_mas_admin_api_base": _env(
|
||||
"COMMS_MAS_ADMIN_API_BASE",
|
||||
"http://matrix-authentication-service:8081/api/admin/v1",
|
||||
).rstrip("/"),
|
||||
"comms_mas_token_url": _env(
|
||||
"COMMS_MAS_TOKEN_URL",
|
||||
"http://matrix-authentication-service:8080/oauth2/token",
|
||||
),
|
||||
"comms_mas_admin_client_id": _env("COMMS_MAS_ADMIN_CLIENT_ID", "01KDXMVQBQ5JNY6SEJPZW6Z8BM"),
|
||||
"comms_mas_admin_client_secret": _env("COMMS_MAS_ADMIN_CLIENT_SECRET", ""),
|
||||
"comms_server_name": _env("COMMS_SERVER_NAME", "live.bstein.dev"),
|
||||
"comms_room_alias": _env("COMMS_ROOM_ALIAS", "#othrys:live.bstein.dev"),
|
||||
"comms_room_name": _env("COMMS_ROOM_NAME", "Othrys"),
|
||||
"comms_pin_message": _env(
|
||||
"COMMS_PIN_MESSAGE",
|
||||
"Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join and choose 'Continue' -> 'Join as guest'.",
|
||||
),
|
||||
"comms_seeder_user": _env("COMMS_SEEDER_USER", "othrys-seeder"),
|
||||
"comms_seeder_password": _env("COMMS_SEEDER_PASSWORD", ""),
|
||||
"comms_bot_user": _env("COMMS_BOT_USER", "atlasbot"),
|
||||
"comms_bot_password": _env("COMMS_BOT_PASSWORD", ""),
|
||||
"comms_synapse_db_host": _env(
|
||||
"COMMS_SYNAPSE_DB_HOST",
|
||||
"postgres-service.postgres.svc.cluster.local",
|
||||
),
|
||||
"comms_synapse_db_port": _env_int("COMMS_SYNAPSE_DB_PORT", 5432),
|
||||
"comms_synapse_db_name": _env("COMMS_SYNAPSE_DB_NAME", "synapse"),
|
||||
"comms_synapse_db_user": _env("COMMS_SYNAPSE_DB_USER", "synapse"),
|
||||
"comms_synapse_db_password": _env("COMMS_SYNAPSE_DB_PASSWORD", ""),
|
||||
"comms_synapse_admin_token": _env("COMMS_SYNAPSE_ADMIN_TOKEN", ""),
|
||||
"comms_timeout_sec": _env_float("COMMS_TIMEOUT_SEC", 30.0),
|
||||
"comms_guest_stale_days": _env_int("COMMS_GUEST_STALE_DAYS", 14),
|
||||
}
|
||||
|
||||
|
||||
def _image_sweeper_config() -> dict[str, Any]:
|
||||
return {
|
||||
"image_sweeper_namespace": _env("IMAGE_SWEEPER_NAMESPACE", "maintenance"),
|
||||
"image_sweeper_service_account": _env("IMAGE_SWEEPER_SERVICE_ACCOUNT", "node-image-sweeper"),
|
||||
"image_sweeper_job_ttl_sec": _env_int("IMAGE_SWEEPER_JOB_TTL_SEC", 3600),
|
||||
"image_sweeper_wait_timeout_sec": _env_float("IMAGE_SWEEPER_WAIT_TIMEOUT_SEC", 1200.0),
|
||||
}
|
||||
|
||||
|
||||
def _platform_quality_probe_config() -> dict[str, Any]:
|
||||
return {
|
||||
"platform_quality_probe_namespace": _env("PLATFORM_QUALITY_PROBE_NAMESPACE", "monitoring"),
|
||||
"platform_quality_probe_script_configmap": _env(
|
||||
"PLATFORM_QUALITY_PROBE_SCRIPT_CONFIGMAP",
|
||||
"platform-quality-suite-probe-script",
|
||||
),
|
||||
"platform_quality_probe_image": _env("PLATFORM_QUALITY_PROBE_IMAGE", "curlimages/curl:8.12.1"),
|
||||
"platform_quality_probe_job_ttl_sec": _env_int("PLATFORM_QUALITY_PROBE_JOB_TTL_SEC", 1800),
|
||||
"platform_quality_probe_wait_timeout_sec": _env_float("PLATFORM_QUALITY_PROBE_WAIT_TIMEOUT_SEC", 180.0),
|
||||
"platform_quality_probe_pushgateway_url": _env(
|
||||
"PLATFORM_QUALITY_PROBE_PUSHGATEWAY_URL",
|
||||
"http://platform-quality-gateway.monitoring.svc.cluster.local:9091",
|
||||
).rstrip("/"),
|
||||
"platform_quality_probe_http_timeout_sec": _env_int("PLATFORM_QUALITY_PROBE_HTTP_TIMEOUT_SECONDS", 12),
|
||||
}
|
||||
|
||||
|
||||
def _jenkins_build_weather_config() -> dict[str, Any]:
|
||||
return {
|
||||
"jenkins_base_url": _env("JENKINS_BASE_URL", "https://ci.bstein.dev").rstrip("/"),
|
||||
"jenkins_api_user": _env("JENKINS_API_USER", ""),
|
||||
"jenkins_api_token": _env("JENKINS_API_TOKEN", ""),
|
||||
"jenkins_api_timeout_sec": _env_float("JENKINS_API_TIMEOUT_SEC", 10.0),
|
||||
}
|
||||
|
||||
|
||||
def _jenkins_workspace_cleanup_config() -> dict[str, Any]:
|
||||
return {
|
||||
"jenkins_workspace_namespace": _env("JENKINS_WORKSPACE_NAMESPACE", "jenkins"),
|
||||
"jenkins_workspace_pvc_prefix": _env("JENKINS_WORKSPACE_PVC_PREFIX", "pvc-workspace-"),
|
||||
"jenkins_workspace_cleanup_min_age_hours": _env_float("JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS", 12.0),
|
||||
"jenkins_workspace_cleanup_dry_run": _env_bool("JENKINS_WORKSPACE_CLEANUP_DRY_RUN", "false"),
|
||||
"jenkins_workspace_cleanup_max_deletions_per_run": _env_int(
|
||||
"JENKINS_WORKSPACE_CLEANUP_MAX_DELETIONS_PER_RUN",
|
||||
20,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _vaultwarden_config() -> dict[str, Any]:
|
||||
return {
|
||||
"vaultwarden_namespace": _env("VAULTWARDEN_NAMESPACE", "vaultwarden"),
|
||||
"vaultwarden_pod_label": _env("VAULTWARDEN_POD_LABEL", "app=vaultwarden"),
|
||||
"vaultwarden_pod_port": _env_int("VAULTWARDEN_POD_PORT", 80),
|
||||
"vaultwarden_service_host": _env(
|
||||
"VAULTWARDEN_SERVICE_HOST",
|
||||
"vaultwarden-service.vaultwarden.svc.cluster.local",
|
||||
),
|
||||
"vaultwarden_admin_secret_name": _env("VAULTWARDEN_ADMIN_SECRET_NAME", "vaultwarden-admin"),
|
||||
"vaultwarden_admin_secret_key": _env("VAULTWARDEN_ADMIN_SECRET_KEY", "ADMIN_TOKEN"),
|
||||
"vaultwarden_admin_session_ttl_sec": _env_float("VAULTWARDEN_ADMIN_SESSION_TTL_SEC", 300.0),
|
||||
"vaultwarden_admin_rate_limit_backoff_sec": _env_float("VAULTWARDEN_ADMIN_RATE_LIMIT_BACKOFF_SEC", 600.0),
|
||||
"vaultwarden_retry_cooldown_sec": _env_float("VAULTWARDEN_RETRY_COOLDOWN_SEC", 1800.0),
|
||||
"vaultwarden_failure_bailout": _env_int("VAULTWARDEN_FAILURE_BAILOUT", 2),
|
||||
"vaultwarden_invite_refresh_sec": _env_float("VAULTWARDEN_INVITE_REFRESH_SEC", 86400.0),
|
||||
}
|
||||
|
||||
|
||||
def _schedule_config() -> dict[str, Any]:
|
||||
return {
|
||||
"mailu_sync_cron": _env("ARIADNE_SCHEDULE_MAILU_SYNC", "30 4 * * *"),
|
||||
"nextcloud_sync_cron": _env("ARIADNE_SCHEDULE_NEXTCLOUD_SYNC", "0 5 * * *"),
|
||||
"nextcloud_cron": _env("ARIADNE_SCHEDULE_NEXTCLOUD_CRON", "*/5 * * * *"),
|
||||
"nextcloud_maintenance_cron": _env("ARIADNE_SCHEDULE_NEXTCLOUD_MAINTENANCE", "30 4 * * *"),
|
||||
"vaultwarden_sync_cron": _env("ARIADNE_SCHEDULE_VAULTWARDEN_SYNC", "0 * * * *"),
|
||||
"wger_user_sync_cron": _env("ARIADNE_SCHEDULE_WGER_USER_SYNC", "0 5 * * *"),
|
||||
"wger_admin_cron": _env("ARIADNE_SCHEDULE_WGER_ADMIN", "15 3 * * *"),
|
||||
"firefly_user_sync_cron": _env("ARIADNE_SCHEDULE_FIREFLY_USER_SYNC", "0 6 * * *"),
|
||||
"firefly_cron": _env("ARIADNE_SCHEDULE_FIREFLY_CRON", "0 3 * * *"),
|
||||
"pod_cleaner_cron": _env("ARIADNE_SCHEDULE_POD_CLEANER", "0 * * * *"),
|
||||
"opensearch_prune_cron": _env("ARIADNE_SCHEDULE_OPENSEARCH_PRUNE", "23 3 * * *"),
|
||||
"image_sweeper_cron": _env("ARIADNE_SCHEDULE_IMAGE_SWEEPER", "30 4 * * 0"),
|
||||
"vault_k8s_auth_cron": _env("ARIADNE_SCHEDULE_VAULT_K8S_AUTH", "0 * * * *"),
|
||||
"vault_oidc_cron": _env("ARIADNE_SCHEDULE_VAULT_OIDC", "0 * * * *"),
|
||||
"comms_guest_name_cron": _env("ARIADNE_SCHEDULE_COMMS_GUEST_NAME", "*/5 * * * *"),
|
||||
"comms_pin_invite_cron": _env("ARIADNE_SCHEDULE_COMMS_PIN_INVITE", "*/30 * * * *"),
|
||||
"comms_reset_room_cron": _env("ARIADNE_SCHEDULE_COMMS_RESET_ROOM", "0 0 1 1 *"),
|
||||
"comms_seed_room_cron": _env("ARIADNE_SCHEDULE_COMMS_SEED_ROOM", "*/10 * * * *"),
|
||||
"keycloak_profile_cron": _env("ARIADNE_SCHEDULE_KEYCLOAK_PROFILE", "0 */6 * * *"),
|
||||
"metis_k3s_token_sync_cron": _env("ARIADNE_SCHEDULE_METIS_K3S_TOKEN_SYNC", "11 */6 * * *"),
|
||||
"platform_quality_suite_probe_cron": _env(
|
||||
"ARIADNE_SCHEDULE_PLATFORM_QUALITY_SUITE_PROBE",
|
||||
"*/15 * * * *",
|
||||
),
|
||||
"jenkins_build_weather_cron": _env(
|
||||
"ARIADNE_SCHEDULE_JENKINS_BUILD_WEATHER",
|
||||
"*/10 * * * *",
|
||||
),
|
||||
"jenkins_workspace_cleanup_cron": _env(
|
||||
"ARIADNE_SCHEDULE_JENKINS_WORKSPACE_CLEANUP",
|
||||
"45 */6 * * *",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _cluster_state_config() -> dict[str, Any]:
|
||||
return {
|
||||
"vm_url": _env(
|
||||
"ARIADNE_VM_URL",
|
||||
"http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428",
|
||||
).rstrip("/"),
|
||||
"cluster_state_vm_timeout_sec": _env_float("ARIADNE_CLUSTER_STATE_VM_TIMEOUT_SEC", 5.0),
|
||||
"alertmanager_url": _env("ARIADNE_ALERTMANAGER_URL", "").rstrip("/"),
|
||||
"cluster_state_cron": _env("ARIADNE_SCHEDULE_CLUSTER_STATE", "*/15 * * * *"),
|
||||
"cluster_state_keep": _env_int("ARIADNE_CLUSTER_STATE_KEEP", 168),
|
||||
}
|
||||
|
||||
|
||||
def _metis_config() -> dict[str, Any]:
|
||||
return {
|
||||
"metis_base_url": _env("METIS_BASE_URL", "http://metis.maintenance.svc.cluster.local").rstrip("/"),
|
||||
"metis_watch_url": _env("METIS_WATCH_URL", "").rstrip("/"),
|
||||
"metis_timeout_sec": _env_float("METIS_TIMEOUT_SEC", 10.0),
|
||||
"metis_sentinel_watch_cron": _env("ARIADNE_SCHEDULE_METIS_SENTINEL_WATCH", "*/15 * * * *"),
|
||||
"metis_token_sync_namespace": _env("METIS_TOKEN_SYNC_NAMESPACE", "maintenance"),
|
||||
"metis_token_sync_service_account": _env("METIS_TOKEN_SYNC_SERVICE_ACCOUNT", "metis-token-sync"),
|
||||
"metis_token_sync_node_name": _env("METIS_TOKEN_SYNC_NODE_NAME", "titan-0a"),
|
||||
"metis_token_sync_image": _env("METIS_TOKEN_SYNC_IMAGE", "hashicorp/vault:1.17.6"),
|
||||
"metis_token_sync_job_ttl_sec": _env_int("METIS_TOKEN_SYNC_JOB_TTL_SEC", 1800),
|
||||
"metis_token_sync_wait_timeout_sec": _env_float("METIS_TOKEN_SYNC_WAIT_TIMEOUT_SEC", 180.0),
|
||||
"metis_token_sync_vault_addr": _env(
|
||||
"METIS_TOKEN_SYNC_VAULT_ADDR",
|
||||
"http://vault.vault.svc.cluster.local:8200",
|
||||
).rstrip("/"),
|
||||
"metis_token_sync_vault_k8s_role": _env("METIS_TOKEN_SYNC_VAULT_K8S_ROLE", "maintenance-metis-token-sync"),
|
||||
}
|
||||
|
||||
|
||||
def _opensearch_config() -> dict[str, Any]:
|
||||
return {
|
||||
"opensearch_url": _env(
|
||||
"OPENSEARCH_URL",
|
||||
"http://opensearch-master.logging.svc.cluster.local:9200",
|
||||
).rstrip("/"),
|
||||
"opensearch_limit_bytes": _env_int("OPENSEARCH_LIMIT_BYTES", 1024**4),
|
||||
"opensearch_index_patterns": _env("OPENSEARCH_INDEX_PATTERNS", "kube-*,journald-*"),
|
||||
"opensearch_timeout_sec": _env_float("OPENSEARCH_TIMEOUT_SEC", 30.0),
|
||||
}
|
||||
@ -39,6 +39,8 @@ def _http_error_detail(exc: httpx.HTTPStatusError) -> str:
|
||||
|
||||
|
||||
def safe_error_detail(exc: Exception, fallback: str) -> str:
|
||||
"""Return a user-safe error message without leaking noisy exception internals."""
|
||||
|
||||
runtime_detail = _runtime_error_detail(exc)
|
||||
if runtime_detail:
|
||||
return runtime_detail
|
||||
|
||||
@ -7,6 +7,8 @@ _BEARER_PARTS = 2
|
||||
|
||||
|
||||
def extract_bearer_token(request: Request) -> str | None:
|
||||
"""Extract a Bearer token from a FastAPI request if one is present."""
|
||||
|
||||
header = request.headers.get("Authorization", "")
|
||||
if not header:
|
||||
return None
|
||||
|
||||
@ -42,6 +42,8 @@ class LogConfig:
|
||||
|
||||
|
||||
class JsonFormatter(logging.Formatter):
|
||||
"""Format log records as structured JSON with Ariadne task context."""
|
||||
|
||||
def format(self, record: logging.LogRecord) -> str:
|
||||
payload: dict[str, Any] = {
|
||||
"timestamp": datetime.fromtimestamp(record.created, tz=timezone.utc).isoformat(),
|
||||
@ -87,6 +89,8 @@ class _ContextFilter(logging.Filter):
|
||||
|
||||
|
||||
def configure_logging(config: LogConfig | None = None) -> None:
|
||||
"""Configure process-wide JSON logging once for Ariadne services."""
|
||||
|
||||
global _LOGGING_CONFIGURED
|
||||
if _LOGGING_CONFIGURED:
|
||||
return
|
||||
@ -109,11 +113,15 @@ def configure_logging(config: LogConfig | None = None) -> None:
|
||||
|
||||
|
||||
def get_logger(name: str) -> logging.Logger:
|
||||
"""Return a named logger using the shared Ariadne logging configuration."""
|
||||
|
||||
return logging.getLogger(name)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def task_context(name: str | None) -> Any:
|
||||
"""Attach a task name to log records emitted inside the context."""
|
||||
|
||||
token = _TASK_NAME.set(name)
|
||||
try:
|
||||
yield
|
||||
|
||||
@ -5,5 +5,7 @@ import string
|
||||
|
||||
|
||||
def random_password(length: int = 32) -> str:
|
||||
"""Generate a random alphanumeric password with the requested length."""
|
||||
|
||||
alphabet = string.ascii_letters + string.digits
|
||||
return "".join(secrets.choice(alphabet) for _ in range(length))
|
||||
|
||||
1
ci/loc_hygiene_waivers.tsv
Normal file
1
ci/loc_hygiene_waivers.tsv
Normal file
@ -0,0 +1 @@
|
||||
# path reason
|
||||
|
@ -2,7 +2,7 @@ fastapi==0.115.11
|
||||
uvicorn[standard]==0.30.6
|
||||
httpx==0.27.2
|
||||
kubernetes==30.1.0
|
||||
PyJWT[crypto]==2.10.1
|
||||
PyJWT[crypto]==2.12.1
|
||||
psycopg[binary]==3.2.6
|
||||
psycopg-pool==3.2.6
|
||||
croniter==2.0.7
|
||||
|
||||
66
scripts/check_coverage_contract.py
Normal file
66
scripts/check_coverage_contract.py
Normal file
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Enforce Ariadne's per-file source coverage contract."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _source_files(root: Path) -> list[str]:
|
||||
files: list[str] = []
|
||||
for path in sorted(root.rglob("*.py")):
|
||||
if "__pycache__" in path.parts:
|
||||
continue
|
||||
files.append(path.as_posix())
|
||||
return files
|
||||
|
||||
|
||||
def _coverage_percent(file_payload: object) -> float | None:
|
||||
if not isinstance(file_payload, dict):
|
||||
return None
|
||||
summary = file_payload.get("summary")
|
||||
if not isinstance(summary, dict):
|
||||
return None
|
||||
value = summary.get("percent_covered")
|
||||
if isinstance(value, (int, float)):
|
||||
return float(value)
|
||||
return None
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("coverage_json")
|
||||
parser.add_argument("--source-root", default="ariadne")
|
||||
parser.add_argument("--threshold", type=float, default=95.0)
|
||||
args = parser.parse_args()
|
||||
|
||||
coverage_path = Path(args.coverage_json)
|
||||
source_root = Path(args.source_root)
|
||||
payload = json.loads(coverage_path.read_text(encoding="utf-8"))
|
||||
files = payload.get("files") if isinstance(payload, dict) else None
|
||||
if not isinstance(files, dict):
|
||||
print(f"{coverage_path}: missing files coverage map")
|
||||
return 1
|
||||
|
||||
failures: list[str] = []
|
||||
for source_file in _source_files(source_root):
|
||||
percent = _coverage_percent(files.get(source_file))
|
||||
if percent is None:
|
||||
failures.append(f"{source_file}: missing from coverage report")
|
||||
elif percent < args.threshold:
|
||||
failures.append(f"{source_file}: {percent:.2f}% below {args.threshold:.2f}%")
|
||||
|
||||
if failures:
|
||||
print("coverage contract failed:")
|
||||
for failure in failures:
|
||||
print(f" - {failure}")
|
||||
return 1
|
||||
|
||||
print(f"coverage contract passed: {len(_source_files(source_root))} files >= {args.threshold:.2f}%")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
91
scripts/check_docstrings.py
Normal file
91
scripts/check_docstrings.py
Normal file
@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Require docstrings on public production APIs."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _is_dataclass_class(node: ast.ClassDef) -> bool:
|
||||
"""Return whether a class uses the dataclass decorator."""
|
||||
|
||||
return any(
|
||||
(isinstance(dec, ast.Name) and dec.id == "dataclass")
|
||||
or (isinstance(dec, ast.Call) and isinstance(dec.func, ast.Name) and dec.func.id == "dataclass")
|
||||
for dec in node.decorator_list
|
||||
)
|
||||
|
||||
|
||||
def _base_names(node: ast.ClassDef) -> set[str]:
|
||||
"""Return simple base class names used by a class definition."""
|
||||
|
||||
return {base.id for base in node.bases if isinstance(base, ast.Name)}
|
||||
|
||||
|
||||
def _needs_function_docstring(node: ast.FunctionDef | ast.AsyncFunctionDef, parent_class: str | None) -> bool:
|
||||
"""Return whether a public function-like node needs a docstring."""
|
||||
|
||||
if node.name.startswith("_") and node.name != "__init__":
|
||||
return False
|
||||
return not (parent_class and node.name.startswith("_"))
|
||||
|
||||
|
||||
def _needs_class_docstring(node: ast.ClassDef) -> bool:
|
||||
"""Return whether a public class-like node needs a docstring."""
|
||||
|
||||
bases = _base_names(node)
|
||||
skipped_bases = {"Exception", "RuntimeError", "BaseException", "BaseModel"}
|
||||
return not (node.name.startswith("_") or _is_dataclass_class(node) or bool(bases.intersection(skipped_bases)))
|
||||
|
||||
|
||||
def _needs_docstring(node: ast.AST, *, parent_class: str | None = None) -> bool:
|
||||
"""Return whether `node` should carry an API contract docstring."""
|
||||
|
||||
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
return _needs_function_docstring(node, parent_class)
|
||||
if isinstance(node, ast.ClassDef):
|
||||
return _needs_class_docstring(node)
|
||||
return False
|
||||
|
||||
|
||||
def _iter_nodes(tree: ast.AST) -> list[tuple[ast.AST, str | None]]:
|
||||
"""Yield top-level surface area nodes for contract checking."""
|
||||
|
||||
return [(node, None) for node in getattr(tree, "body", [])]
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Scan the production package and fail on missing docstrings."""
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--root", default="ariadne")
|
||||
args = parser.parse_args()
|
||||
|
||||
root = Path(args.root)
|
||||
violations: list[str] = []
|
||||
for path in sorted(root.rglob("*.py")):
|
||||
if "__pycache__" in path.parts or ".venv" in path.parts:
|
||||
continue
|
||||
tree = ast.parse(path.read_text(encoding="utf-8"))
|
||||
for node, parent_class in _iter_nodes(tree):
|
||||
if not _needs_docstring(node, parent_class=parent_class):
|
||||
continue
|
||||
if ast.get_docstring(node):
|
||||
continue
|
||||
if isinstance(node, ast.ClassDef):
|
||||
violations.append(f"{path}: class {node.name} is missing a docstring")
|
||||
elif isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
owner = f"{parent_class}." if parent_class else ""
|
||||
violations.append(f"{path}: {owner}{node.name} is missing a docstring")
|
||||
|
||||
if violations:
|
||||
for item in violations:
|
||||
print(item)
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@ -1,10 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Enforce a ratcheted source file line-budget contract.
|
||||
|
||||
The check fails when:
|
||||
- a file exceeds the configured line budget and is not allowlisted; or
|
||||
- an allowlist entry is stale (file removed or now within budget).
|
||||
"""
|
||||
"""Fail when source files exceed a configured line-count threshold."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@ -12,73 +7,77 @@ import argparse
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _iter_source_files(roots: list[str], exts: set[str]) -> list[Path]:
|
||||
files: list[Path] = []
|
||||
for root_text in roots:
|
||||
root = Path(root_text)
|
||||
if not root.exists():
|
||||
continue
|
||||
for path in root.rglob("*"):
|
||||
if not path.is_file():
|
||||
continue
|
||||
if path.suffix not in exts:
|
||||
continue
|
||||
if "__pycache__" in path.parts or ".venv" in path.parts:
|
||||
continue
|
||||
files.append(path.resolve())
|
||||
return sorted(files)
|
||||
DEFAULT_SKIP_PARTS = {
|
||||
".git",
|
||||
".venv",
|
||||
"venv",
|
||||
"build",
|
||||
"dist",
|
||||
"node_modules",
|
||||
"__pycache__",
|
||||
".pytest_cache",
|
||||
}
|
||||
SOURCE_SUFFIXES = {".py", ".sh", ".json", ".yaml", ".yml"}
|
||||
|
||||
|
||||
def _load_waivers(path: Path) -> dict[str, str]:
|
||||
waivers: dict[str, str] = {}
|
||||
def _read_waivers(path: Path) -> set[str]:
|
||||
if not path.exists():
|
||||
return waivers
|
||||
for raw_line in path.read_text(encoding="utf-8").splitlines():
|
||||
line = raw_line.strip()
|
||||
if not line or line.startswith("#"):
|
||||
return set()
|
||||
waived: set[str] = set()
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
row = line.strip()
|
||||
if not row or row.startswith("#"):
|
||||
continue
|
||||
parts = line.split("\t")
|
||||
rel_path = parts[0].strip()
|
||||
reason = parts[1].strip() if len(parts) > 1 else ""
|
||||
if rel_path:
|
||||
waivers[rel_path] = reason
|
||||
return waivers
|
||||
waived.add(row.split("\t", 1)[0].strip())
|
||||
return waived
|
||||
|
||||
|
||||
def _iter_files(root: Path) -> list[Path]:
|
||||
if not root.exists():
|
||||
return []
|
||||
files: list[Path] = []
|
||||
for path in root.rglob("*"):
|
||||
if not path.is_file():
|
||||
continue
|
||||
if any(part in DEFAULT_SKIP_PARTS for part in path.parts):
|
||||
continue
|
||||
if path.suffix.lower() not in SOURCE_SUFFIXES and path.name != "Jenkinsfile":
|
||||
continue
|
||||
files.append(path)
|
||||
return files
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--roots", nargs="+", default=["ariadne", "scripts", "tests"])
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--roots", nargs="+", required=True)
|
||||
parser.add_argument("--max-lines", type=int, default=500)
|
||||
parser.add_argument("--waivers", default="scripts/loc_hygiene_waivers.tsv")
|
||||
parser.add_argument("--waivers", default="ci/loc_hygiene_waivers.tsv")
|
||||
args = parser.parse_args()
|
||||
|
||||
repo_root = Path.cwd().resolve()
|
||||
waivers = _load_waivers(repo_root / args.waivers)
|
||||
source_files = _iter_source_files(args.roots, {".py", ".sh"})
|
||||
waived = _read_waivers(repo_root / args.waivers)
|
||||
|
||||
violations: dict[str, int] = {}
|
||||
for path in source_files:
|
||||
rel = path.relative_to(repo_root).as_posix()
|
||||
lines = len(path.read_text(encoding="utf-8", errors="ignore").splitlines())
|
||||
if lines > args.max_lines:
|
||||
violations[rel] = lines
|
||||
offenders: list[tuple[int, str]] = []
|
||||
for root_name in args.roots:
|
||||
for path in _iter_files(repo_root / root_name):
|
||||
rel = path.relative_to(repo_root).as_posix()
|
||||
if rel in waived:
|
||||
continue
|
||||
try:
|
||||
line_count = sum(1 for _ in path.open("r", encoding="utf-8", errors="ignore"))
|
||||
except OSError:
|
||||
continue
|
||||
if line_count > args.max_lines:
|
||||
offenders.append((line_count, rel))
|
||||
|
||||
unexpected = sorted(rel for rel in violations if rel not in waivers)
|
||||
stale = sorted(rel for rel in waivers if rel not in violations)
|
||||
if not unexpected and not stale:
|
||||
print(
|
||||
f"[hygiene] source line budget check passed (limit={args.max_lines}, over_limit={len(violations)}, waivers={len(waivers)})"
|
||||
)
|
||||
if not offenders:
|
||||
print(f"[loc] ok: no files exceed {args.max_lines} lines")
|
||||
return 0
|
||||
|
||||
if unexpected:
|
||||
print("[hygiene] files over budget missing from waiver list:")
|
||||
for rel in unexpected:
|
||||
print(f"- {rel}: {violations[rel]} lines (limit {args.max_lines})")
|
||||
if stale:
|
||||
print("[hygiene] stale waiver entries (remove from waiver list):")
|
||||
for rel in stale:
|
||||
print(f"- {rel}")
|
||||
offenders.sort(reverse=True)
|
||||
print(f"[loc] failed: {len(offenders)} file(s) exceed {args.max_lines} lines")
|
||||
for lines, rel in offenders:
|
||||
print(f" - {rel}: {lines} lines")
|
||||
return 1
|
||||
|
||||
|
||||
|
||||
@ -1,14 +0,0 @@
|
||||
# relative_path<TAB>why_it_is_allowlisted_for_now
|
||||
ariadne/app.py core application router/orchestration pending decomposition
|
||||
ariadne/manager/provisioning.py provisioning workflow hub pending modular extraction
|
||||
ariadne/services/cluster_state.py legacy cluster-state monolith pending split (tracked by branch scope)
|
||||
ariadne/services/comms.py legacy comms monolith pending split by concern
|
||||
ariadne/services/firefly.py firefly integration handlers pending endpoint split
|
||||
ariadne/services/nextcloud.py nextcloud integration surface pending staged decomposition
|
||||
ariadne/services/vault.py vault integration flow pending dedicated auth/storage modules
|
||||
ariadne/services/wger.py wger integration flow pending endpoint-layer split
|
||||
ariadne/settings.py configuration map pending domain-specific config modules
|
||||
tests/test_app.py broad integration assertions pending test-suite decomposition
|
||||
tests/test_keycloak_admin.py keycloak contract tests pending helper extraction
|
||||
tests/test_provisioning.py provisioning matrix tests pending split by workflow phase
|
||||
tests/test_services.py service integration matrix pending split by service domain
|
||||
|
Can't render this file because it has a wrong number of fields in line 2.
|
@ -10,8 +10,12 @@ import sys
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
SOURCE_SCAN_ROOTS = ("ariadne", "scripts", "tests")
|
||||
HTTP_BAD_REQUEST = 400
|
||||
MIN_METRIC_FIELDS = 2
|
||||
SOURCE_SCAN_ROOTS = ("ariadne", "scripts", "testing")
|
||||
SOURCE_EXTENSIONS = {".py", ".sh"}
|
||||
QUALITY_SUCCESS_STATES = {"ok", "pass", "passed", "success", "compliant"}
|
||||
COVERAGE_GATE_TARGET_PERCENT = 95.0
|
||||
|
||||
|
||||
def _escape_label(value: str) -> str:
|
||||
@ -61,6 +65,37 @@ def _load_junit(path: str) -> dict[str, int]:
|
||||
return totals
|
||||
|
||||
|
||||
def _load_junit_cases(path: str) -> list[tuple[str, str]]:
|
||||
tree = ET.parse(path)
|
||||
root = tree.getroot()
|
||||
|
||||
suites: list[ET.Element]
|
||||
if root.tag == "testsuite":
|
||||
suites = [root]
|
||||
elif root.tag == "testsuites":
|
||||
suites = list(root.findall("testsuite"))
|
||||
else:
|
||||
suites = []
|
||||
|
||||
cases: list[tuple[str, str]] = []
|
||||
for suite in suites:
|
||||
for case in suite.findall("testcase"):
|
||||
name = (case.attrib.get("name") or "").strip()
|
||||
classname = (case.attrib.get("classname") or "").strip()
|
||||
if not name:
|
||||
continue
|
||||
test_id = f"{classname}::{name}" if classname else name
|
||||
status = "passed"
|
||||
if case.find("failure") is not None:
|
||||
status = "failed"
|
||||
elif case.find("error") is not None:
|
||||
status = "error"
|
||||
elif case.find("skipped") is not None:
|
||||
status = "skipped"
|
||||
cases.append((test_id, status))
|
||||
return cases
|
||||
|
||||
|
||||
def _read_http(url: str) -> str:
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=10) as resp:
|
||||
@ -73,11 +108,11 @@ def _post_text(url: str, payload: str) -> None:
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=payload.encode("utf-8"),
|
||||
method="POST",
|
||||
method="PUT",
|
||||
headers={"Content-Type": "text/plain"},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
if resp.status >= 400:
|
||||
if resp.status >= HTTP_BAD_REQUEST:
|
||||
raise RuntimeError(f"metrics push failed status={resp.status}")
|
||||
|
||||
|
||||
@ -92,7 +127,7 @@ def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str,
|
||||
if any(f'{k}="{v}"' not in line for k, v in labels.items()):
|
||||
continue
|
||||
parts = line.split()
|
||||
if len(parts) < 2:
|
||||
if len(parts) < MIN_METRIC_FIELDS:
|
||||
continue
|
||||
try:
|
||||
return float(parts[1])
|
||||
@ -118,31 +153,120 @@ def _count_source_files_over_limit(repo_root: Path, max_lines: int = 500) -> int
|
||||
return count
|
||||
|
||||
|
||||
def _load_gate_rc(path: Path) -> int | None:
|
||||
if not path.exists():
|
||||
return None
|
||||
raw = path.read_text(encoding="utf-8").strip()
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
return int(raw)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _load_json(path: Path) -> dict | None:
|
||||
if not path.exists():
|
||||
return None
|
||||
try:
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
return None
|
||||
return payload if isinstance(payload, dict) else None
|
||||
|
||||
|
||||
def _sonarqube_check_status(build_dir: Path) -> str:
|
||||
report = _load_json(Path(os.getenv("QUALITY_GATE_SONARQUBE_REPORT", str(build_dir / "sonarqube-quality-gate.json"))))
|
||||
if not report:
|
||||
return "not_applicable"
|
||||
status_candidates = [
|
||||
report.get("status"),
|
||||
((report.get("projectStatus") or {}).get("status") if isinstance(report.get("projectStatus"), dict) else None),
|
||||
((report.get("qualityGate") or {}).get("status") if isinstance(report.get("qualityGate"), dict) else None),
|
||||
]
|
||||
for value in status_candidates:
|
||||
if isinstance(value, str):
|
||||
return "ok" if value.strip().lower() in QUALITY_SUCCESS_STATES else "failed"
|
||||
return "failed"
|
||||
|
||||
|
||||
def _supply_chain_check_status(build_dir: Path) -> str:
|
||||
report = _load_json(Path(os.getenv("QUALITY_GATE_IRONBANK_REPORT", str(build_dir / "ironbank-compliance.json"))))
|
||||
if not report:
|
||||
return "not_applicable"
|
||||
compliant = report.get("compliant")
|
||||
if isinstance(compliant, bool):
|
||||
return "ok" if compliant else "failed"
|
||||
status_candidates = [report.get("status"), report.get("result"), report.get("compliance")]
|
||||
for value in status_candidates:
|
||||
if isinstance(value, str):
|
||||
return "ok" if value.strip().lower() in QUALITY_SUCCESS_STATES else "failed"
|
||||
return "failed"
|
||||
|
||||
|
||||
def _resolve_artifact_paths(repo_root: Path) -> tuple[Path, Path]:
|
||||
"""Find coverage and JUnit artifacts even when a test runner uses fallback names."""
|
||||
|
||||
coverage_path = Path(os.getenv("COVERAGE_JSON", "build/coverage.json"))
|
||||
junit_path = Path(os.getenv("JUNIT_XML", "build/junit.xml"))
|
||||
if not coverage_path.exists():
|
||||
for candidate in (
|
||||
repo_root / "build" / "coverage.json",
|
||||
repo_root / "build" / "coverage-summary.json",
|
||||
repo_root / "build" / "coverage" / "coverage-summary.json",
|
||||
):
|
||||
if candidate.exists():
|
||||
coverage_path = candidate
|
||||
break
|
||||
if not junit_path.exists():
|
||||
junit_candidates = sorted((repo_root / "build").glob("junit*.xml"))
|
||||
if junit_candidates:
|
||||
junit_path = junit_candidates[0]
|
||||
return coverage_path, junit_path
|
||||
|
||||
|
||||
def main() -> int:
|
||||
repo_root = Path(__file__).resolve().parents[1]
|
||||
coverage_path = os.getenv("COVERAGE_JSON", "build/coverage.json")
|
||||
junit_path = os.getenv("JUNIT_XML", "build/junit.xml")
|
||||
build_dir = repo_root / "build"
|
||||
coverage_path, junit_path = _resolve_artifact_paths(repo_root)
|
||||
pushgateway_url = os.getenv(
|
||||
"PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091"
|
||||
).strip()
|
||||
suite = os.getenv("SUITE_NAME", "ariadne")
|
||||
branch = os.getenv("BRANCH_NAME", "")
|
||||
branch = os.getenv("BRANCH_NAME") or os.getenv("GIT_BRANCH") or "unknown"
|
||||
if branch.startswith("origin/"):
|
||||
branch = branch[len("origin/") :]
|
||||
build_number = os.getenv("BUILD_NUMBER", "")
|
||||
jenkins_job = os.getenv("JOB_NAME", "ariadne")
|
||||
commit = os.getenv("GIT_COMMIT", "")
|
||||
|
||||
if not os.path.exists(coverage_path):
|
||||
raise RuntimeError(f"missing coverage file {coverage_path}")
|
||||
if not os.path.exists(junit_path):
|
||||
raise RuntimeError(f"missing junit file {junit_path}")
|
||||
print(f"[metrics] coverage_path={coverage_path} exists={coverage_path.exists()}")
|
||||
print(f"[metrics] junit_path={junit_path} exists={junit_path.exists()}")
|
||||
|
||||
coverage = _load_coverage(coverage_path)
|
||||
coverage = 0.0
|
||||
if coverage_path.exists():
|
||||
coverage = _load_coverage(str(coverage_path))
|
||||
docs_gate_rc = _load_gate_rc(Path(os.getenv("QUALITY_GATE_DOCS_RC_PATH", str(build_dir / "docs-naming.rc"))))
|
||||
source_lines_over_500 = _count_source_files_over_limit(repo_root, max_lines=500)
|
||||
totals = _load_junit(junit_path)
|
||||
totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
|
||||
test_cases: list[tuple[str, str]] = []
|
||||
if junit_path.exists():
|
||||
totals = _load_junit(str(junit_path))
|
||||
test_cases = _load_junit_cases(str(junit_path))
|
||||
passed = max(totals["tests"] - totals["failures"] - totals["errors"] - totals["skipped"], 0)
|
||||
|
||||
outcome = "ok"
|
||||
if totals["tests"] <= 0 or totals["failures"] > 0 or totals["errors"] > 0:
|
||||
outcome = "failed"
|
||||
checks = {
|
||||
"tests": "ok" if outcome == "ok" else "failed",
|
||||
"coverage": "ok" if coverage >= COVERAGE_GATE_TARGET_PERCENT else "failed",
|
||||
"loc": "ok" if source_lines_over_500 == 0 else "failed",
|
||||
"docs_naming": "ok" if docs_gate_rc == 0 else "failed",
|
||||
"gate_glue": "ok",
|
||||
"sonarqube": _sonarqube_check_status(build_dir),
|
||||
"supply_chain": _supply_chain_check_status(build_dir),
|
||||
}
|
||||
|
||||
job_name = "platform-quality-ci"
|
||||
ok_count = _fetch_existing_counter(
|
||||
@ -164,8 +288,15 @@ def main() -> int:
|
||||
"suite": suite,
|
||||
"branch": branch,
|
||||
"build_number": build_number,
|
||||
"jenkins_job": jenkins_job,
|
||||
"commit": commit,
|
||||
}
|
||||
test_case_base_labels = {
|
||||
"suite": suite,
|
||||
"branch": branch,
|
||||
"build_number": build_number or "unknown",
|
||||
"jenkins_job": jenkins_job,
|
||||
}
|
||||
payload_lines = [
|
||||
"# TYPE platform_quality_gate_runs_total counter",
|
||||
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count:.0f}',
|
||||
@ -181,9 +312,26 @@ def main() -> int:
|
||||
f'platform_quality_gate_workspace_line_coverage_percent{{suite="{suite}"}} {coverage:.3f}',
|
||||
"# TYPE platform_quality_gate_source_lines_over_500_total gauge",
|
||||
f'platform_quality_gate_source_lines_over_500_total{{suite="{suite}"}} {source_lines_over_500}',
|
||||
"# TYPE platform_quality_gate_build_info gauge",
|
||||
f"platform_quality_gate_build_info{_label_str(labels)} 1",
|
||||
"# TYPE ariadne_quality_gate_checks_total gauge",
|
||||
"# TYPE platform_quality_gate_test_case_result gauge",
|
||||
"# TYPE ariadne_quality_gate_build_info gauge",
|
||||
f"ariadne_quality_gate_build_info{_label_str(labels)} 1",
|
||||
]
|
||||
if test_cases:
|
||||
payload_lines.extend(
|
||||
f"platform_quality_gate_test_case_result{_label_str({**test_case_base_labels, 'test': test_name, 'status': test_status})} 1"
|
||||
for test_name, test_status in test_cases
|
||||
)
|
||||
else:
|
||||
payload_lines.append(
|
||||
f"platform_quality_gate_test_case_result{_label_str({**test_case_base_labels, 'test': '__no_test_cases__', 'status': 'skipped'})} 1"
|
||||
)
|
||||
payload_lines.extend(
|
||||
f'ariadne_quality_gate_checks_total{{suite="{suite}",check="{check_name}",result="{check_status}"}} 1'
|
||||
for check_name, check_status in checks.items()
|
||||
)
|
||||
payload = "\n".join(payload_lines) + "\n"
|
||||
_post_text(f"{pushgateway_url.rstrip('/')}/metrics/job/{job_name}/suite/{suite}", payload)
|
||||
|
||||
|
||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
1003
tests/test_app.py
1003
tests/test_app.py
File diff suppressed because it is too large
Load Diff
@ -20,7 +20,7 @@ def test_keycloak_verify_accepts_matching_audience(monkeypatch) -> None:
|
||||
kc = KeycloakOIDC("https://jwks", "https://issuer", "portal")
|
||||
|
||||
monkeypatch.setattr(kc, "_get_jwks", lambda force=False: {"keys": [{"kid": "test"}]})
|
||||
monkeypatch.setattr(jwt.algorithms.RSAAlgorithm, "from_jwk", lambda key: "dummy")
|
||||
monkeypatch.setattr(kc, "_key_from_jwk", lambda key: "dummy")
|
||||
monkeypatch.setattr(
|
||||
jwt,
|
||||
"decode",
|
||||
@ -36,7 +36,7 @@ def test_keycloak_verify_rejects_wrong_audience(monkeypatch) -> None:
|
||||
kc = KeycloakOIDC("https://jwks", "https://issuer", "portal")
|
||||
|
||||
monkeypatch.setattr(kc, "_get_jwks", lambda force=False: {"keys": [{"kid": "test"}]})
|
||||
monkeypatch.setattr(jwt.algorithms.RSAAlgorithm, "from_jwk", lambda key: "dummy")
|
||||
monkeypatch.setattr(kc, "_key_from_jwk", lambda key: "dummy")
|
||||
monkeypatch.setattr(
|
||||
jwt,
|
||||
"decode",
|
||||
@ -73,7 +73,7 @@ def test_keycloak_verify_refreshes_jwks(monkeypatch) -> None:
|
||||
return {"keys": [{"kid": "test"}]}
|
||||
|
||||
monkeypatch.setattr(kc, "_get_jwks", fake_get_jwks)
|
||||
monkeypatch.setattr(jwt.algorithms.RSAAlgorithm, "from_jwk", lambda key: "dummy")
|
||||
monkeypatch.setattr(kc, "_key_from_jwk", lambda key: "dummy")
|
||||
monkeypatch.setattr(
|
||||
jwt,
|
||||
"decode",
|
||||
|
||||
@ -98,6 +98,25 @@ def test_migrate_ignores_timeout_errors(monkeypatch) -> None:
|
||||
db.migrate(lock_id=123)
|
||||
|
||||
|
||||
def test_migrate_stops_when_dict_lock_is_unavailable(monkeypatch) -> None:
|
||||
class DictLockConn(DummyConn):
|
||||
def execute(self, query, params=None):
|
||||
if "pg_try_advisory_lock" in query:
|
||||
return DummyResult(row={"pg_try_advisory_lock": False})
|
||||
return super().execute(query, params)
|
||||
|
||||
class DictLockPool(DummyPool):
|
||||
def __init__(self, conninfo=None, min_size=None, max_size=None, kwargs=None):
|
||||
self.conn = DictLockConn()
|
||||
|
||||
monkeypatch.setattr(db_module, "ConnectionPool", DictLockPool)
|
||||
db = Database("postgresql://user:pass@localhost/db")
|
||||
|
||||
db.migrate(lock_id=123)
|
||||
|
||||
assert not any("pg_advisory_unlock" in query for query, _params in db._pool.conn.executed)
|
||||
|
||||
|
||||
def test_migrate_handles_lock_on_alter(monkeypatch) -> None:
|
||||
class LockConn(DummyConn):
|
||||
def execute(self, query, params=None):
|
||||
@ -114,6 +133,46 @@ def test_migrate_handles_lock_on_alter(monkeypatch) -> None:
|
||||
db.migrate(lock_id=123)
|
||||
|
||||
|
||||
def test_migrate_skips_missing_access_request_table(monkeypatch) -> None:
|
||||
class MissingAccessRequestsConn(DummyConn):
|
||||
def execute(self, query, params=None):
|
||||
if "ALTER TABLE access_requests" in query:
|
||||
self.executed.append((query, params))
|
||||
raise db_module.psycopg.errors.UndefinedTable()
|
||||
return super().execute(query, params)
|
||||
|
||||
class MissingAccessRequestsPool(DummyPool):
|
||||
def __init__(self, conninfo=None, min_size=None, max_size=None, kwargs=None):
|
||||
self.conn = MissingAccessRequestsConn()
|
||||
|
||||
monkeypatch.setattr(db_module, "ConnectionPool", MissingAccessRequestsPool)
|
||||
db = Database("postgresql://user:pass@localhost/db")
|
||||
|
||||
db.migrate(lock_id=123, include_ariadne_tables=False)
|
||||
|
||||
assert any("ALTER TABLE access_requests" in query for query, _params in db._pool.conn.executed)
|
||||
|
||||
|
||||
def test_migrate_ignores_unlock_failures(monkeypatch) -> None:
|
||||
class UnlockFailureConn(DummyConn):
|
||||
def execute(self, query, params=None):
|
||||
if "pg_advisory_unlock" in query:
|
||||
self.executed.append((query, params))
|
||||
raise RuntimeError("unlock connection closed")
|
||||
return super().execute(query, params)
|
||||
|
||||
class UnlockFailurePool(DummyPool):
|
||||
def __init__(self, conninfo=None, min_size=None, max_size=None, kwargs=None):
|
||||
self.conn = UnlockFailureConn()
|
||||
|
||||
monkeypatch.setattr(db_module, "ConnectionPool", UnlockFailurePool)
|
||||
db = Database("postgresql://user:pass@localhost/db")
|
||||
|
||||
db.migrate(lock_id=123, include_ariadne_tables=False, include_access_requests=False)
|
||||
|
||||
assert any("pg_advisory_unlock" in query for query, _params in db._pool.conn.executed)
|
||||
|
||||
|
||||
def test_fetchone_and_fetchall_return_dicts(monkeypatch) -> None:
|
||||
class RowConn(DummyConn):
|
||||
def execute(self, query, params=None):
|
||||
|
||||
325
tests/test_jenkins_build_weather.py
Normal file
325
tests/test_jenkins_build_weather.py
Normal file
@ -0,0 +1,325 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
import types
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from prometheus_client import REGISTRY
|
||||
|
||||
from ariadne.services import jenkins_build_weather as weather_module
|
||||
|
||||
|
||||
class _DummyResponse:
|
||||
def __init__(self, payload: dict[str, object], status_code: int = 200) -> None:
|
||||
self._payload = payload
|
||||
self.status_code = status_code
|
||||
|
||||
def raise_for_status(self) -> None:
|
||||
if self.status_code >= 400:
|
||||
request = httpx.Request("GET", "https://ci.bstein.dev/api/json")
|
||||
response = httpx.Response(self.status_code, request=request)
|
||||
raise httpx.HTTPStatusError("boom", request=request, response=response)
|
||||
|
||||
def json(self) -> dict[str, object]:
|
||||
return self._payload
|
||||
|
||||
|
||||
class _DummyClient:
|
||||
def __init__(self, payload: dict[str, object]) -> None:
|
||||
self._payload = payload
|
||||
self.called = False
|
||||
|
||||
def __enter__(self) -> _DummyClient:
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb) -> bool:
|
||||
return False
|
||||
|
||||
def get(self, url: str, params: dict[str, str] | None = None) -> _DummyResponse:
|
||||
self.called = True
|
||||
assert url == "https://ci.bstein.dev/api/json"
|
||||
assert isinstance(params, dict)
|
||||
assert "tree" in params
|
||||
return _DummyResponse(self._payload)
|
||||
|
||||
|
||||
def _metric_value(name: str, labels: dict[str, str] | None = None) -> float | None:
|
||||
value = REGISTRY.get_sample_value(name, labels or {})
|
||||
return float(value) if value is not None else None
|
||||
|
||||
|
||||
def _dummy_settings(base_url: str = "https://ci.bstein.dev") -> types.SimpleNamespace:
|
||||
return types.SimpleNamespace(
|
||||
jenkins_base_url=base_url,
|
||||
jenkins_api_user="",
|
||||
jenkins_api_token="",
|
||||
jenkins_api_timeout_sec=5.0,
|
||||
)
|
||||
|
||||
|
||||
def test_collect_jenkins_build_weather_records_metrics(monkeypatch) -> None:
|
||||
weather_module._JOB_SERIES = set()
|
||||
monkeypatch.setattr(weather_module, "settings", _dummy_settings())
|
||||
|
||||
payload = {
|
||||
"jobs": [
|
||||
{
|
||||
"name": "ariadne",
|
||||
"url": "https://ci.bstein.dev/job/ariadne/",
|
||||
"color": "blue",
|
||||
"healthReport": [{"score": 93}],
|
||||
"lastBuild": {"result": "SUCCESS", "timestamp": 1713000000000, "duration": 186000},
|
||||
"lastSuccessfulBuild": {"timestamp": 1713000000000},
|
||||
"lastFailedBuild": {"timestamp": 1712000000000},
|
||||
},
|
||||
{
|
||||
"name": "titan-iac",
|
||||
"url": "https://ci.bstein.dev/job/titan-iac/",
|
||||
"color": "red",
|
||||
"healthReport": [{"score": 11}],
|
||||
"lastBuild": {"result": "FAILURE", "timestamp": 1712990000000, "duration": 126000},
|
||||
"lastSuccessfulBuild": {"timestamp": 1711000000000},
|
||||
"lastFailedBuild": {"timestamp": 1712990000000},
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(weather_module.httpx, "Client", lambda **_kwargs: _DummyClient(payload))
|
||||
|
||||
before = _metric_value("ariadne_jenkins_build_weather_runs_total", {"status": "ok"}) or 0.0
|
||||
summary = weather_module.collect_jenkins_build_weather()
|
||||
|
||||
assert summary.jobs_total == 2
|
||||
assert summary.success_total == 1
|
||||
assert summary.failure_total == 1
|
||||
assert summary.running_total == 0
|
||||
assert summary.unknown_total == 0
|
||||
|
||||
assert (_metric_value("ariadne_jenkins_build_weather_runs_total", {"status": "ok"}) or 0.0) == before + 1
|
||||
assert _metric_value(
|
||||
"ariadne_jenkins_build_weather_job_last_status",
|
||||
{
|
||||
"job": "ariadne",
|
||||
"job_url": "https://ci.bstein.dev/job/ariadne/",
|
||||
"weather_icon": "☀️",
|
||||
},
|
||||
) == 1.0
|
||||
assert _metric_value(
|
||||
"ariadne_jenkins_build_weather_job_last_status",
|
||||
{
|
||||
"job": "titan-iac",
|
||||
"job_url": "https://ci.bstein.dev/job/titan-iac/",
|
||||
"weather_icon": "⛈️",
|
||||
},
|
||||
) == 0.0
|
||||
assert _metric_value(
|
||||
"ariadne_jenkins_build_weather_job_last_duration_seconds",
|
||||
{
|
||||
"job": "ariadne",
|
||||
"job_url": "https://ci.bstein.dev/job/ariadne/",
|
||||
"weather_icon": "☀️",
|
||||
},
|
||||
) == 186.0
|
||||
|
||||
|
||||
def test_collect_jenkins_build_weather_removes_deleted_job_series(monkeypatch) -> None:
|
||||
weather_module._JOB_SERIES = set()
|
||||
monkeypatch.setattr(weather_module, "settings", _dummy_settings())
|
||||
|
||||
first_payload = {
|
||||
"jobs": [
|
||||
{
|
||||
"name": "ariadne",
|
||||
"url": "https://ci.bstein.dev/job/ariadne/",
|
||||
"color": "blue",
|
||||
"healthReport": [{"score": 90}],
|
||||
"lastBuild": {"result": "SUCCESS", "timestamp": 1713000000000, "duration": 186000},
|
||||
"lastSuccessfulBuild": {"timestamp": 1713000000000},
|
||||
"lastFailedBuild": {"timestamp": 1712000000000},
|
||||
},
|
||||
{
|
||||
"name": "pegasus",
|
||||
"url": "https://ci.bstein.dev/job/pegasus/",
|
||||
"color": "yellow",
|
||||
"healthReport": [{"score": 50}],
|
||||
"lastBuild": {"result": "FAILURE", "timestamp": 1712980000000, "duration": 120000},
|
||||
"lastSuccessfulBuild": {"timestamp": 1710000000000},
|
||||
"lastFailedBuild": {"timestamp": 1712980000000},
|
||||
},
|
||||
]
|
||||
}
|
||||
second_payload = {
|
||||
"jobs": [
|
||||
{
|
||||
"name": "ariadne",
|
||||
"url": "https://ci.bstein.dev/job/ariadne/",
|
||||
"color": "blue",
|
||||
"healthReport": [{"score": 90}],
|
||||
"lastBuild": {"result": "SUCCESS", "timestamp": 1713010000000, "duration": 184000},
|
||||
"lastSuccessfulBuild": {"timestamp": 1713010000000},
|
||||
"lastFailedBuild": {"timestamp": 1712000000000},
|
||||
}
|
||||
]
|
||||
}
|
||||
payloads = [first_payload, second_payload]
|
||||
|
||||
monkeypatch.setattr(
|
||||
weather_module.httpx,
|
||||
"Client",
|
||||
lambda **_kwargs: _DummyClient(payloads.pop(0)),
|
||||
)
|
||||
|
||||
weather_module.collect_jenkins_build_weather()
|
||||
weather_module.collect_jenkins_build_weather()
|
||||
|
||||
assert _metric_value(
|
||||
"ariadne_jenkins_build_weather_job_last_status",
|
||||
{
|
||||
"job": "pegasus",
|
||||
"job_url": "https://ci.bstein.dev/job/pegasus/",
|
||||
"weather_icon": "☁️",
|
||||
},
|
||||
) is None
|
||||
|
||||
|
||||
def test_collect_jenkins_build_weather_skips_when_base_url_empty(monkeypatch) -> None:
|
||||
weather_module._JOB_SERIES = set()
|
||||
monkeypatch.setattr(weather_module, "settings", _dummy_settings(base_url=""))
|
||||
|
||||
before = _metric_value("ariadne_jenkins_build_weather_runs_total", {"status": "skipped"}) or 0.0
|
||||
summary = weather_module.collect_jenkins_build_weather()
|
||||
|
||||
assert summary.jobs_total == 0
|
||||
assert (_metric_value("ariadne_jenkins_build_weather_runs_total", {"status": "skipped"}) or 0.0) == before + 1
|
||||
|
||||
|
||||
def test_fetch_jobs_flattens_folder_jobs(monkeypatch) -> None:
|
||||
weather_module._JOB_SERIES = set()
|
||||
monkeypatch.setattr(weather_module, "settings", _dummy_settings())
|
||||
|
||||
payload = {
|
||||
"jobs": [
|
||||
{
|
||||
"name": "folder",
|
||||
"url": "https://ci.bstein.dev/job/folder/",
|
||||
"jobs": [
|
||||
{
|
||||
"name": "child",
|
||||
"url": "https://ci.bstein.dev/job/folder/job/child/",
|
||||
"color": "blue",
|
||||
"healthReport": [{"score": 100}],
|
||||
"lastBuild": {"result": "SUCCESS", "timestamp": 1713000000000, "duration": 1000},
|
||||
"lastSuccessfulBuild": {"timestamp": 1713000000000},
|
||||
"lastFailedBuild": {"timestamp": 1712000000000},
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(weather_module.httpx, "Client", lambda **_kwargs: _DummyClient(payload))
|
||||
|
||||
jobs = weather_module._fetch_jobs()
|
||||
assert len(jobs) == 1
|
||||
assert jobs[0].job == "folder/child"
|
||||
assert jobs[0].status == "success"
|
||||
assert jobs[0].last_duration_seconds == 1.0
|
||||
assert datetime.fromtimestamp(jobs[0].last_run_ts, tz=timezone.utc).year == 2024
|
||||
|
||||
|
||||
def test_weather_helper_edges(monkeypatch) -> None:
|
||||
assert weather_module._metric_number(True) == 0.0
|
||||
assert weather_module._metric_number(object()) == 0.0
|
||||
assert weather_module._millis_to_seconds(0) == 0.0
|
||||
|
||||
monkeypatch.setattr(
|
||||
weather_module,
|
||||
"settings",
|
||||
types.SimpleNamespace(jenkins_api_user=" user ", jenkins_api_token=" token "),
|
||||
)
|
||||
assert weather_module._jenkins_auth() == ("user", "token")
|
||||
|
||||
assert weather_module._jenkins_status({"color": "blue_anime"}) == "running"
|
||||
assert weather_module._jenkins_status({"color": "green"}) == "success"
|
||||
assert weather_module._jenkins_status({"color": "yellow"}) == "failure"
|
||||
assert weather_module._jenkins_status({}) == "unknown"
|
||||
|
||||
assert weather_module._health_score({"healthReport": ["bad"]}, "success") == 100.0
|
||||
assert weather_module._health_score({}, "running") == 60.0
|
||||
assert weather_module._health_score({}, "failure") == 10.0
|
||||
assert weather_module._health_score({}, "unknown") == -1.0
|
||||
|
||||
assert weather_module._weather_icon(-1) == "❔"
|
||||
assert weather_module._weather_icon(60) == "⛅"
|
||||
assert weather_module._weather_icon(20) == "🌧️"
|
||||
|
||||
|
||||
def test_flatten_parse_and_fetch_edges(monkeypatch) -> None:
|
||||
flattened = weather_module._flatten_jobs(
|
||||
[
|
||||
"bad",
|
||||
{"name": ""},
|
||||
{"name": "folder", "jobs": [{"name": "child", "url": "https://ci/job/child/", "lastBuild": {"result": "SUCCESS"}}]},
|
||||
{"name": "folder-without-build", "jobs": []},
|
||||
]
|
||||
)
|
||||
assert [job["name"] for job in flattened] == ["folder/child", "folder-without-build"]
|
||||
assert weather_module._parse_job({"name": "missing-url"}) is None
|
||||
|
||||
monkeypatch.setattr(weather_module, "settings", _dummy_settings(base_url=""))
|
||||
assert weather_module._fetch_jobs() == []
|
||||
|
||||
captured = {}
|
||||
|
||||
class CapturingClient(_DummyClient):
|
||||
def __init__(self, **kwargs):
|
||||
captured.update(kwargs)
|
||||
super().__init__({"jobs": [{"name": "bad"}]})
|
||||
|
||||
monkeypatch.setattr(
|
||||
weather_module,
|
||||
"settings",
|
||||
types.SimpleNamespace(
|
||||
jenkins_base_url="https://ci.bstein.dev/",
|
||||
jenkins_api_user="user",
|
||||
jenkins_api_token="token",
|
||||
jenkins_api_timeout_sec=7.0,
|
||||
),
|
||||
)
|
||||
monkeypatch.setattr(weather_module.httpx, "Client", CapturingClient)
|
||||
assert weather_module._fetch_jobs() == []
|
||||
assert captured["auth"] == ("user", "token")
|
||||
assert captured["timeout"] == 7.0
|
||||
|
||||
class NonObjectClient(_DummyClient):
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(["bad"])
|
||||
|
||||
monkeypatch.setattr(weather_module.httpx, "Client", NonObjectClient)
|
||||
with pytest.raises(ValueError, match="non-object"):
|
||||
weather_module._fetch_jobs()
|
||||
|
||||
|
||||
def test_remove_missing_series_ignores_missing_metric_labels(monkeypatch) -> None:
|
||||
class MissingMetric:
|
||||
def remove(self, *labels):
|
||||
raise KeyError(labels)
|
||||
|
||||
weather_module._JOB_SERIES = {("old", "https://ci/job/old/", "☀️")}
|
||||
monkeypatch.setattr(weather_module, "_JOB_METRICS", (MissingMetric(),))
|
||||
|
||||
weather_module._remove_missing_series(set())
|
||||
|
||||
assert weather_module._JOB_SERIES == set()
|
||||
|
||||
|
||||
def test_collect_jenkins_build_weather_records_error(monkeypatch) -> None:
|
||||
monkeypatch.setattr(weather_module, "settings", _dummy_settings())
|
||||
before = _metric_value("ariadne_jenkins_build_weather_runs_total", {"status": "error"}) or 0.0
|
||||
monkeypatch.setattr(weather_module, "_fetch_jobs", lambda: (_ for _ in ()).throw(RuntimeError("jenkins down")))
|
||||
|
||||
with pytest.raises(RuntimeError, match="jenkins down"):
|
||||
weather_module.collect_jenkins_build_weather()
|
||||
|
||||
assert (_metric_value("ariadne_jenkins_build_weather_runs_total", {"status": "error"}) or 0.0) == before + 1
|
||||
388
tests/test_jenkins_workspace_cleanup.py
Normal file
388
tests/test_jenkins_workspace_cleanup.py
Normal file
@ -0,0 +1,388 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
import types
|
||||
|
||||
from prometheus_client import REGISTRY
|
||||
|
||||
from ariadne.services import jenkins_workspace_cleanup as cleanup_module
|
||||
|
||||
|
||||
def _metric_value(name: str, labels: dict[str, str]) -> float:
|
||||
value = REGISTRY.get_sample_value(name, labels)
|
||||
return float(value) if value is not None else 0.0
|
||||
|
||||
|
||||
def _dummy_settings(*, dry_run: bool, max_deletions: int = 20) -> types.SimpleNamespace:
|
||||
return types.SimpleNamespace(
|
||||
jenkins_workspace_namespace="jenkins",
|
||||
jenkins_workspace_pvc_prefix="pvc-workspace-",
|
||||
jenkins_workspace_cleanup_min_age_hours=1.0,
|
||||
jenkins_workspace_cleanup_dry_run=dry_run,
|
||||
jenkins_workspace_cleanup_max_deletions_per_run=max_deletions,
|
||||
)
|
||||
|
||||
|
||||
def _fake_payloads(now_iso: str, old_iso: str) -> dict[str, dict[str, object]]:
|
||||
return {
|
||||
"/api/v1/namespaces/jenkins/pods": {
|
||||
"items": [
|
||||
{
|
||||
"metadata": {
|
||||
"annotations": {
|
||||
"jenkins.io/workspace-pvc": "pvc-workspace-annotated-active",
|
||||
}
|
||||
},
|
||||
"spec": {
|
||||
"volumes": [
|
||||
{"persistentVolumeClaim": {"claimName": "pvc-workspace-active"}},
|
||||
]
|
||||
},
|
||||
}
|
||||
]
|
||||
},
|
||||
"/api/v1/namespaces/jenkins/persistentvolumeclaims": {
|
||||
"items": [
|
||||
{
|
||||
"metadata": {"name": "pvc-workspace-stale", "creationTimestamp": old_iso},
|
||||
"status": {"phase": "Lost"},
|
||||
},
|
||||
{
|
||||
"metadata": {"name": "pvc-workspace-active", "creationTimestamp": old_iso},
|
||||
"status": {"phase": "Bound"},
|
||||
},
|
||||
{
|
||||
"metadata": {"name": "pvc-workspace-annotated-active", "creationTimestamp": old_iso},
|
||||
"status": {"phase": "Lost"},
|
||||
},
|
||||
{
|
||||
"metadata": {"name": "pvc-workspace-fresh", "creationTimestamp": now_iso},
|
||||
"status": {"phase": "Lost"},
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"name": "pvc-workspace-deleting",
|
||||
"creationTimestamp": old_iso,
|
||||
"deletionTimestamp": old_iso,
|
||||
},
|
||||
"status": {"phase": "Lost"},
|
||||
},
|
||||
]
|
||||
},
|
||||
"/api/v1/persistentvolumes": {
|
||||
"items": [
|
||||
{
|
||||
"metadata": {"name": "pvc-old", "creationTimestamp": old_iso},
|
||||
"status": {"phase": "Released"},
|
||||
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-stale"}},
|
||||
},
|
||||
{
|
||||
"metadata": {"name": "pvc-active", "creationTimestamp": old_iso},
|
||||
"status": {"phase": "Released"},
|
||||
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-active"}},
|
||||
},
|
||||
{
|
||||
"metadata": {"name": "pvc-annotated", "creationTimestamp": old_iso},
|
||||
"status": {"phase": "Released"},
|
||||
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-annotated-active"}},
|
||||
},
|
||||
{
|
||||
"metadata": {"name": "pvc-fresh", "creationTimestamp": now_iso},
|
||||
"status": {"phase": "Released"},
|
||||
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-fresh"}},
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"name": "pvc-deleting",
|
||||
"creationTimestamp": old_iso,
|
||||
"deletionTimestamp": old_iso,
|
||||
},
|
||||
"status": {"phase": "Released"},
|
||||
"spec": {"claimRef": {"namespace": "jenkins", "name": "pvc-workspace-deleting"}},
|
||||
},
|
||||
]
|
||||
},
|
||||
"/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes": {
|
||||
"items": [
|
||||
{"metadata": {"name": "pvc-old", "creationTimestamp": old_iso}},
|
||||
{
|
||||
"metadata": {
|
||||
"name": "pvc-orphan",
|
||||
"creationTimestamp": old_iso,
|
||||
"labels": {
|
||||
"kubernetes.io/created-for/pvc/name": "pvc-workspace-orphan",
|
||||
"kubernetes.io/created-for/pvc/namespace": "jenkins",
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"name": "pvc-attached",
|
||||
"creationTimestamp": old_iso,
|
||||
"labels": {
|
||||
"kubernetes.io/created-for/pvc/name": "pvc-workspace-annotated-active",
|
||||
"kubernetes.io/created-for/pvc/namespace": "jenkins",
|
||||
},
|
||||
},
|
||||
"status": {"state": "attached", "isAttached": True, "robustness": "healthy"},
|
||||
"spec": {"frontend": "blockdev"},
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"name": "pvc-orphan-other-namespace",
|
||||
"creationTimestamp": old_iso,
|
||||
"labels": {
|
||||
"kubernetes.io/created-for/pvc/name": "pvc-workspace-orphan",
|
||||
"kubernetes.io/created-for/pvc/namespace": "nextcloud",
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"name": "pvc-orphan-fresh",
|
||||
"creationTimestamp": now_iso,
|
||||
"labels": {
|
||||
"kubernetes.io/created-for/pvc/name": "pvc-workspace-fresh",
|
||||
"kubernetes.io/created-for/pvc/namespace": "jenkins",
|
||||
},
|
||||
}
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"name": "pvc-vol-deleting",
|
||||
"creationTimestamp": old_iso,
|
||||
"deletionTimestamp": old_iso,
|
||||
"labels": {
|
||||
"kubernetes.io/created-for/pvc/name": "pvc-workspace-orphan",
|
||||
"kubernetes.io/created-for/pvc/namespace": "jenkins",
|
||||
},
|
||||
}
|
||||
},
|
||||
]
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def test_cleanup_jenkins_workspace_storage_dry_run(monkeypatch) -> None:
|
||||
monkeypatch.setattr(cleanup_module, "settings", _dummy_settings(dry_run=True))
|
||||
|
||||
now_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
||||
old_iso = "2020-01-01T00:00:00Z"
|
||||
payloads = _fake_payloads(now_iso, old_iso)
|
||||
deleted_paths: list[str] = []
|
||||
|
||||
def fake_get_json(path: str):
|
||||
if path in payloads:
|
||||
return payloads[path]
|
||||
raise AssertionError(f"unexpected path: {path}")
|
||||
|
||||
def fake_delete_json(path: str):
|
||||
deleted_paths.append(path)
|
||||
return {"status": "Success"}
|
||||
|
||||
before_runs = _metric_value(
|
||||
"ariadne_jenkins_workspace_cleanup_runs_total",
|
||||
{"status": "ok", "mode": "dry_run"},
|
||||
)
|
||||
before_planned = _metric_value(
|
||||
"ariadne_jenkins_workspace_cleanup_objects_total",
|
||||
{"kind": "pvc", "action": "planned", "mode": "dry_run"},
|
||||
)
|
||||
|
||||
monkeypatch.setattr(cleanup_module, "get_json", fake_get_json)
|
||||
monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json)
|
||||
|
||||
summary = cleanup_module.cleanup_jenkins_workspace_storage()
|
||||
|
||||
assert summary.dry_run is True
|
||||
assert summary.pvcs_planned == 1
|
||||
assert summary.pvs_planned == 1
|
||||
assert summary.volumes_planned == 2
|
||||
assert summary.pvcs_deleted == 0
|
||||
assert summary.pvs_deleted == 0
|
||||
assert summary.volumes_deleted == 0
|
||||
assert summary.failures == 0
|
||||
assert deleted_paths == []
|
||||
assert _metric_value(
|
||||
"ariadne_jenkins_workspace_cleanup_runs_total",
|
||||
{"status": "ok", "mode": "dry_run"},
|
||||
) == before_runs + 1
|
||||
assert _metric_value(
|
||||
"ariadne_jenkins_workspace_cleanup_objects_total",
|
||||
{"kind": "pvc", "action": "planned", "mode": "dry_run"},
|
||||
) == before_planned + 1
|
||||
|
||||
|
||||
def test_cleanup_jenkins_workspace_storage(monkeypatch) -> None:
|
||||
monkeypatch.setattr(cleanup_module, "settings", _dummy_settings(dry_run=False))
|
||||
|
||||
now_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
||||
old_iso = "2020-01-01T00:00:00Z"
|
||||
deleted_paths: list[str] = []
|
||||
payloads = _fake_payloads(now_iso, old_iso)
|
||||
|
||||
def fake_get_json(path: str):
|
||||
if path in payloads:
|
||||
return payloads[path]
|
||||
raise AssertionError(f"unexpected path: {path}")
|
||||
|
||||
def fake_delete_json(path: str):
|
||||
deleted_paths.append(path)
|
||||
return {"status": "Success"}
|
||||
|
||||
before_runs = _metric_value(
|
||||
"ariadne_jenkins_workspace_cleanup_runs_total",
|
||||
{"status": "ok", "mode": "delete"},
|
||||
)
|
||||
before_deleted = _metric_value(
|
||||
"ariadne_jenkins_workspace_cleanup_objects_total",
|
||||
{"kind": "longhorn_volume", "action": "deleted", "mode": "delete"},
|
||||
)
|
||||
|
||||
monkeypatch.setattr(cleanup_module, "get_json", fake_get_json)
|
||||
monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json)
|
||||
|
||||
summary = cleanup_module.cleanup_jenkins_workspace_storage()
|
||||
|
||||
assert summary.pvcs_deleted == 1
|
||||
assert summary.pvs_deleted == 1
|
||||
assert summary.volumes_deleted == 2
|
||||
assert summary.failures == 0
|
||||
assert "/api/v1/namespaces/jenkins/persistentvolumeclaims/pvc-workspace-stale" in deleted_paths
|
||||
assert "/api/v1/persistentvolumes/pvc-old" in deleted_paths
|
||||
assert "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/pvc-old" in deleted_paths
|
||||
assert "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/pvc-orphan" in deleted_paths
|
||||
assert "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/pvc-orphan-other-namespace" not in deleted_paths
|
||||
assert "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/pvc-attached" not in deleted_paths
|
||||
assert _metric_value(
|
||||
"ariadne_jenkins_workspace_cleanup_runs_total",
|
||||
{"status": "ok", "mode": "delete"},
|
||||
) == before_runs + 1
|
||||
assert _metric_value(
|
||||
"ariadne_jenkins_workspace_cleanup_objects_total",
|
||||
{"kind": "longhorn_volume", "action": "deleted", "mode": "delete"},
|
||||
) == before_deleted + 2
|
||||
|
||||
|
||||
def test_cleanup_jenkins_workspace_storage_failure(monkeypatch) -> None:
|
||||
monkeypatch.setattr(cleanup_module, "settings", _dummy_settings(dry_run=False))
|
||||
|
||||
def fake_get_json(path: str):
|
||||
if path == "/api/v1/namespaces/jenkins/pods":
|
||||
return {"items": []}
|
||||
if path == "/api/v1/namespaces/jenkins/persistentvolumeclaims":
|
||||
return {
|
||||
"items": [
|
||||
{
|
||||
"metadata": {"name": "pvc-workspace-stale", "creationTimestamp": "2020-01-01T00:00:00Z"},
|
||||
"status": {"phase": "Lost"},
|
||||
}
|
||||
]
|
||||
}
|
||||
if path == "/api/v1/persistentvolumes":
|
||||
return {"items": []}
|
||||
if path == "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes":
|
||||
return {"items": []}
|
||||
raise AssertionError(f"unexpected path: {path}")
|
||||
|
||||
def fake_delete_json(_path: str):
|
||||
raise RuntimeError("boom")
|
||||
|
||||
before_failures = _metric_value(
|
||||
"ariadne_jenkins_workspace_cleanup_objects_total",
|
||||
{"kind": "cleanup", "action": "failed", "mode": "delete"},
|
||||
)
|
||||
|
||||
monkeypatch.setattr(cleanup_module, "get_json", fake_get_json)
|
||||
monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json)
|
||||
|
||||
summary = cleanup_module.cleanup_jenkins_workspace_storage()
|
||||
assert summary.failures == 1
|
||||
assert summary.pvcs_deleted == 0
|
||||
assert _metric_value(
|
||||
"ariadne_jenkins_workspace_cleanup_objects_total",
|
||||
{"kind": "cleanup", "action": "failed", "mode": "delete"},
|
||||
) == before_failures + 1
|
||||
|
||||
|
||||
def test_cleanup_jenkins_workspace_storage_uses_longhorn_kubernetes_status(monkeypatch) -> None:
|
||||
monkeypatch.setattr(cleanup_module, "settings", _dummy_settings(dry_run=False))
|
||||
deleted_paths: list[str] = []
|
||||
|
||||
def fake_get_json(path: str):
|
||||
if path == "/api/v1/namespaces/jenkins/pods":
|
||||
return {"items": []}
|
||||
if path == "/api/v1/namespaces/jenkins/persistentvolumeclaims":
|
||||
return {"items": []}
|
||||
if path == "/api/v1/persistentvolumes":
|
||||
return {"items": []}
|
||||
if path == "/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes":
|
||||
return {
|
||||
"items": [
|
||||
{
|
||||
"metadata": {
|
||||
"name": "pvc-orphan-kstatus",
|
||||
"creationTimestamp": "2020-01-01T00:00:00Z",
|
||||
},
|
||||
"status": {
|
||||
"state": "detached",
|
||||
"isAttached": False,
|
||||
"robustness": "unknown",
|
||||
"kubernetesStatus": {
|
||||
"namespace": "jenkins",
|
||||
"pvcName": "pvc-workspace-kstatus",
|
||||
"pvName": "pvc-orphan-kstatus",
|
||||
},
|
||||
},
|
||||
"spec": {"frontend": "blockdev"},
|
||||
}
|
||||
]
|
||||
}
|
||||
raise AssertionError(f"unexpected path: {path}")
|
||||
|
||||
def fake_delete_json(path: str):
|
||||
deleted_paths.append(path)
|
||||
return {"status": "Success"}
|
||||
|
||||
monkeypatch.setattr(cleanup_module, "get_json", fake_get_json)
|
||||
monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json)
|
||||
|
||||
summary = cleanup_module.cleanup_jenkins_workspace_storage()
|
||||
|
||||
assert summary.volumes_planned == 1
|
||||
assert summary.volumes_deleted == 1
|
||||
assert summary.failures == 0
|
||||
assert deleted_paths == ["/apis/longhorn.io/v1beta2/namespaces/longhorn-system/volumes/pvc-orphan-kstatus"]
|
||||
|
||||
|
||||
def test_cleanup_jenkins_workspace_storage_guard_caps_mass_delete(monkeypatch) -> None:
|
||||
monkeypatch.setattr(cleanup_module, "settings", _dummy_settings(dry_run=False, max_deletions=1))
|
||||
|
||||
now_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
||||
old_iso = "2020-01-01T00:00:00Z"
|
||||
payloads = _fake_payloads(now_iso, old_iso)
|
||||
deleted_paths: list[str] = []
|
||||
|
||||
def fake_get_json(path: str):
|
||||
if path in payloads:
|
||||
return payloads[path]
|
||||
raise AssertionError(f"unexpected path: {path}")
|
||||
|
||||
def fake_delete_json(path: str):
|
||||
deleted_paths.append(path)
|
||||
return {"status": "Success"}
|
||||
|
||||
monkeypatch.setattr(cleanup_module, "get_json", fake_get_json)
|
||||
monkeypatch.setattr(cleanup_module, "delete_json", fake_delete_json)
|
||||
|
||||
summary = cleanup_module.cleanup_jenkins_workspace_storage()
|
||||
|
||||
assert summary.failures == 0
|
||||
assert summary.pvcs_planned == 1
|
||||
assert summary.pvs_planned == 1
|
||||
assert summary.volumes_planned == 1
|
||||
assert summary.pvcs_deleted == 1
|
||||
assert summary.pvs_deleted == 0
|
||||
assert summary.volumes_deleted == 0
|
||||
assert summary.skipped == 2
|
||||
assert deleted_paths == ["/api/v1/namespaces/jenkins/persistentvolumeclaims/pvc-workspace-stale"]
|
||||
@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import builtins
|
||||
import importlib.util
|
||||
import sys
|
||||
import types
|
||||
|
||||
import pytest
|
||||
@ -57,11 +60,25 @@ class HangingStream(DummyStream):
|
||||
return False
|
||||
|
||||
|
||||
class ReturnCodeStream(DummyStream):
|
||||
def __init__(self):
|
||||
super().__init__(stdout="fallback", stderr="", exit_code=0)
|
||||
self.returncode = 7
|
||||
|
||||
def is_open(self) -> bool:
|
||||
return False
|
||||
|
||||
def peek_exit_code(self):
|
||||
raise AssertionError("closed streams should not read exit code")
|
||||
|
||||
|
||||
def test_build_command_wraps_env() -> None:
|
||||
cmd = _build_command(["echo", "hello"], {"FOO": "bar"})
|
||||
assert cmd[0] == "/bin/sh"
|
||||
assert "export FOO=bar" in cmd[2]
|
||||
|
||||
assert _build_command("echo hello", None) == ["/bin/sh", "-c", "echo hello"]
|
||||
|
||||
|
||||
def test_exec_returns_output(monkeypatch) -> None:
|
||||
monkeypatch.setattr(exec_module, "select_pod", lambda *_args, **_kwargs: PodRef("pod", "ns"))
|
||||
@ -94,6 +111,17 @@ def test_exec_times_out(monkeypatch) -> None:
|
||||
executor.exec(["sleep", "10"], timeout_sec=0.0, check=False)
|
||||
|
||||
|
||||
def test_exec_uses_returncode_when_stream_has_no_exit_code(monkeypatch) -> None:
|
||||
monkeypatch.setattr(exec_module, "select_pod", lambda *_args, **_kwargs: PodRef("pod", "ns"))
|
||||
monkeypatch.setattr(exec_module, "_ensure_client", lambda: types.SimpleNamespace(connect_get_namespaced_pod_exec=None))
|
||||
monkeypatch.setattr(exec_module, "stream", lambda *args, **kwargs: ReturnCodeStream())
|
||||
|
||||
result = PodExecutor("ns", "app=test", None).exec("echo ok", check=False)
|
||||
|
||||
assert result.exit_code == 7
|
||||
assert result.ok is False
|
||||
|
||||
|
||||
def test_ensure_client_fallback(monkeypatch) -> None:
|
||||
dummy_api = object()
|
||||
monkeypatch.setattr(exec_module, "_CORE_API", None)
|
||||
@ -115,3 +143,39 @@ def test_ensure_client_fallback(monkeypatch) -> None:
|
||||
monkeypatch.setattr(exec_module, "client", types.SimpleNamespace(CoreV1Api=lambda: dummy_api))
|
||||
|
||||
assert exec_module._ensure_client() is dummy_api
|
||||
|
||||
|
||||
def test_ensure_client_cached_and_import_error(monkeypatch) -> None:
|
||||
cached = object()
|
||||
monkeypatch.setattr(exec_module, "_IMPORT_ERROR", None)
|
||||
monkeypatch.setattr(exec_module, "_CORE_API", cached)
|
||||
assert exec_module._ensure_client() is cached
|
||||
|
||||
error = RuntimeError("missing kubernetes")
|
||||
monkeypatch.setattr(exec_module, "_IMPORT_ERROR", error)
|
||||
monkeypatch.setattr(exec_module, "_CORE_API", None)
|
||||
with pytest.raises(RuntimeError, match="kubernetes client missing"):
|
||||
exec_module._ensure_client()
|
||||
|
||||
|
||||
def test_exec_module_import_error_fallback(monkeypatch) -> None:
|
||||
real_import = builtins.__import__
|
||||
|
||||
def fake_import(name, globals=None, locals=None, fromlist=(), level=0):
|
||||
if name == "kubernetes" or name.startswith("kubernetes."):
|
||||
raise RuntimeError("kubernetes unavailable")
|
||||
return real_import(name, globals, locals, fromlist, level)
|
||||
|
||||
module_name = "ariadne.k8s.exec_import_failure_probe"
|
||||
spec = importlib.util.spec_from_file_location(module_name, exec_module.__file__)
|
||||
assert spec and spec.loader
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
monkeypatch.setattr(builtins, "__import__", fake_import)
|
||||
monkeypatch.setitem(sys.modules, module_name, module)
|
||||
|
||||
spec.loader.exec_module(module)
|
||||
|
||||
assert module.client is None
|
||||
assert module.config is None
|
||||
assert module.stream is None
|
||||
assert isinstance(module._IMPORT_ERROR, RuntimeError)
|
||||
|
||||
@ -17,6 +17,23 @@ def test_list_pods_encodes_selector(monkeypatch) -> None:
|
||||
assert "labelSelector=app%3Dnextcloud" in captured["path"]
|
||||
|
||||
|
||||
def test_list_pods_rejects_missing_namespace() -> None:
|
||||
with pytest.raises(pods_module.PodSelectionError, match="namespace missing"):
|
||||
pods_module.list_pods(" ", "app=nextcloud")
|
||||
|
||||
|
||||
def test_parse_start_time_handles_empty_invalid_and_naive_values() -> None:
|
||||
assert pods_module._parse_start_time(None) == 0.0
|
||||
assert pods_module._parse_start_time("not-a-date") == 0.0
|
||||
assert pods_module._parse_start_time("2026-01-20T00:00:00") > 0
|
||||
|
||||
|
||||
def test_ready_helper_handles_malformed_conditions() -> None:
|
||||
assert pods_module._is_ready({"status": {"phase": "Running"}}) is False
|
||||
assert pods_module._is_ready({"status": {"phase": "Running", "conditions": [None]}}) is False
|
||||
assert pods_module._is_ready({"status": {"phase": "Running", "conditions": [{"type": "ContainersReady"}]}}) is False
|
||||
|
||||
|
||||
def test_select_pod_picks_ready_latest(monkeypatch) -> None:
|
||||
payload = {
|
||||
"items": [
|
||||
@ -57,3 +74,28 @@ def test_select_pod_ignores_non_ready(monkeypatch) -> None:
|
||||
|
||||
with pytest.raises(pods_module.PodSelectionError):
|
||||
pods_module.select_pod("demo", "app=test")
|
||||
|
||||
|
||||
def test_select_pod_skips_deleting_and_blank_names(monkeypatch) -> None:
|
||||
payload = {
|
||||
"items": [
|
||||
{
|
||||
"metadata": {"name": "deleting", "deletionTimestamp": "2026-01-20T00:00:00Z"},
|
||||
"status": {"phase": "Running", "conditions": [{"type": "Ready", "status": "True"}]},
|
||||
},
|
||||
{
|
||||
"metadata": {"name": " "},
|
||||
"status": {"phase": "Running", "conditions": [{"type": "Ready", "status": "True"}]},
|
||||
},
|
||||
{
|
||||
"metadata": {"name": "ready"},
|
||||
"status": {"phase": "Running", "nodeName": "titan-1", "conditions": [{"type": "Ready", "status": "True"}]},
|
||||
},
|
||||
]
|
||||
}
|
||||
monkeypatch.setattr(pods_module, "get_json", lambda *_args, **_kwargs: payload)
|
||||
|
||||
pod = pods_module.select_pod("demo", "app=test")
|
||||
|
||||
assert pod.name == "ready"
|
||||
assert pod.node == "titan-1"
|
||||
|
||||
@ -1,5 +1,8 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
|
||||
from ariadne.services import mailu_events as mailu_events_module
|
||||
from ariadne.services.mailu_events import MailuEventRunner
|
||||
|
||||
|
||||
@ -52,3 +55,95 @@ def test_mailu_event_debounce() -> None:
|
||||
status, payload = events.handle_event({"force": True})
|
||||
assert status == 202
|
||||
assert payload["status"] == "accepted"
|
||||
|
||||
|
||||
def test_mailu_event_parses_string_flags_and_context() -> None:
|
||||
calls = []
|
||||
|
||||
def runner(reason: str, force: bool):
|
||||
calls.append((reason, force))
|
||||
return "ok", ""
|
||||
|
||||
events = MailuEventRunner(
|
||||
min_interval_sec=0.0,
|
||||
wait_timeout_sec=0.1,
|
||||
runner=runner,
|
||||
thread_factory=_instant_thread_factory,
|
||||
)
|
||||
|
||||
status, payload = events.handle_event({"wait": "yes", "force": "on", "eventType": " UPDATE_PROFILE ", "userId": " u1 "})
|
||||
|
||||
assert status == 200
|
||||
assert payload["status"] == "ok"
|
||||
assert calls == [("keycloak_event:UPDATE_PROFILE", True)]
|
||||
|
||||
|
||||
def test_mailu_event_defaults_for_missing_payload() -> None:
|
||||
calls = []
|
||||
|
||||
def runner(reason: str, force: bool):
|
||||
calls.append((reason, force))
|
||||
return "ok", ""
|
||||
|
||||
events = MailuEventRunner(
|
||||
min_interval_sec=0.0,
|
||||
wait_timeout_sec=0.1,
|
||||
runner=runner,
|
||||
thread_factory=_instant_thread_factory,
|
||||
)
|
||||
|
||||
status, payload = events.handle_event(None)
|
||||
|
||||
assert status == 202
|
||||
assert payload == {"status": "accepted", "triggered": True}
|
||||
assert calls == [("keycloak_event", False)]
|
||||
|
||||
|
||||
def test_mailu_event_running_skip_and_wait_timeout() -> None:
|
||||
def parked_thread_factory(target=None, args=(), daemon=None):
|
||||
class ParkedThread:
|
||||
def start(self) -> None:
|
||||
return None
|
||||
|
||||
return ParkedThread()
|
||||
|
||||
events = MailuEventRunner(
|
||||
min_interval_sec=0.0,
|
||||
wait_timeout_sec=0.0,
|
||||
runner=lambda _reason, _force: ("ok", ""),
|
||||
thread_factory=parked_thread_factory,
|
||||
)
|
||||
|
||||
status, payload = events.handle_event({"wait": True})
|
||||
assert status == 200
|
||||
assert payload == {"status": "running"}
|
||||
|
||||
status, payload = events.handle_event({})
|
||||
assert status == 202
|
||||
assert payload == {"status": "skipped", "triggered": False}
|
||||
|
||||
|
||||
def test_mailu_event_runner_reports_exceptions() -> None:
|
||||
def failing_runner(_reason: str, _force: bool):
|
||||
raise RuntimeError("mailu exploded")
|
||||
|
||||
events = MailuEventRunner(
|
||||
min_interval_sec=0.0,
|
||||
wait_timeout_sec=0.1,
|
||||
runner=failing_runner,
|
||||
thread_factory=_instant_thread_factory,
|
||||
)
|
||||
|
||||
status, payload = events.handle_event({"wait": True})
|
||||
|
||||
assert status == 500
|
||||
assert payload == {"status": "error", "detail": "mailu exploded"}
|
||||
|
||||
|
||||
def test_default_runner_maps_mailu_summary(monkeypatch) -> None:
|
||||
events = MailuEventRunner(min_interval_sec=0.0, wait_timeout_sec=0.1)
|
||||
monkeypatch.setattr(mailu_events_module.mailu, "sync", lambda reason, force=False: SimpleNamespace(failures=0, detail="synced"))
|
||||
assert events._default_runner("test", True) == ("ok", "synced")
|
||||
|
||||
monkeypatch.setattr(mailu_events_module.mailu, "sync", lambda reason, force=False: SimpleNamespace(failures=1, detail="failed"))
|
||||
assert events._default_runner("test", False) == ("error", "failed")
|
||||
|
||||
@ -111,3 +111,90 @@ def test_watch_sentinel_handles_http_error(monkeypatch) -> None:
|
||||
assert summary.status == "error"
|
||||
assert summary.detail == "upstream fail"
|
||||
assert summary.result["detail"] == "upstream fail"
|
||||
|
||||
|
||||
def test_normalize_payload_and_ready(monkeypatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
"ariadne.services.metis.settings",
|
||||
SimpleNamespace(metis_base_url="http://metis", metis_watch_url="", metis_timeout_sec=10.0),
|
||||
)
|
||||
service = metis_module.MetisService()
|
||||
|
||||
assert service.ready() is True
|
||||
assert metis_module._normalize_payload(None) == {}
|
||||
assert metis_module._normalize_payload(["watched"]) == {"result": ["watched"]}
|
||||
|
||||
monkeypatch.setattr(
|
||||
"ariadne.services.metis.settings",
|
||||
SimpleNamespace(metis_base_url="", metis_watch_url="", metis_timeout_sec=10.0),
|
||||
)
|
||||
assert service.ready() is False
|
||||
|
||||
|
||||
def test_watch_sentinel_handles_non_json_success(monkeypatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
"ariadne.services.metis.settings",
|
||||
SimpleNamespace(metis_base_url="http://metis", metis_watch_url="", metis_timeout_sec=10.0),
|
||||
)
|
||||
client = DummyClient(DummyResponse(payload=ValueError("not json")))
|
||||
monkeypatch.setattr(metis_module.httpx, "Client", lambda **kwargs: client)
|
||||
|
||||
summary = metis_module.MetisService().watch_sentinel()
|
||||
|
||||
assert summary.status == "ok"
|
||||
assert summary.result == {}
|
||||
|
||||
|
||||
def test_watch_sentinel_handles_http_error_without_json(monkeypatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
"ariadne.services.metis.settings",
|
||||
SimpleNamespace(metis_base_url="http://metis", metis_watch_url="", metis_timeout_sec=10.0),
|
||||
)
|
||||
client = DummyClient(DummyResponse(status_code=503, payload=ValueError("not json")))
|
||||
monkeypatch.setattr(metis_module.httpx, "Client", lambda **kwargs: client)
|
||||
|
||||
summary = metis_module.MetisService().watch_sentinel()
|
||||
|
||||
assert summary.status == "error"
|
||||
assert summary.detail == "metis watch failed with HTTP 503"
|
||||
|
||||
|
||||
def test_watch_sentinel_handles_client_exception(monkeypatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
"ariadne.services.metis.settings",
|
||||
SimpleNamespace(metis_base_url="http://metis", metis_watch_url="", metis_timeout_sec=10.0),
|
||||
)
|
||||
|
||||
class FailingClient:
|
||||
def __enter__(self):
|
||||
raise RuntimeError("network down")
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
monkeypatch.setattr(metis_module.httpx, "Client", lambda **kwargs: FailingClient())
|
||||
|
||||
summary = metis_module.MetisService().watch_sentinel()
|
||||
|
||||
assert summary.status == "error"
|
||||
assert summary.detail == "network down"
|
||||
|
||||
|
||||
def test_watch_sentinel_normalizes_message_and_unknown_status(monkeypatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
"ariadne.services.metis.settings",
|
||||
SimpleNamespace(metis_base_url="http://metis", metis_watch_url="", metis_timeout_sec=10.0),
|
||||
)
|
||||
client = DummyClient(DummyResponse(payload={"status": "warning", "message": " watched with warnings "}))
|
||||
monkeypatch.setattr(metis_module.httpx, "Client", lambda **kwargs: client)
|
||||
|
||||
summary = metis_module.MetisService().watch_sentinel()
|
||||
|
||||
assert summary.status == "ok"
|
||||
assert summary.detail == "watched with warnings"
|
||||
|
||||
client.response = DummyResponse(payload={"status": "paused"})
|
||||
summary = metis_module.MetisService().watch_sentinel()
|
||||
|
||||
assert summary.status == "ok"
|
||||
assert summary.detail == "metis watch returned paused"
|
||||
|
||||
10
tests/test_name_generator.py
Normal file
10
tests/test_name_generator.py
Normal file
@ -0,0 +1,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from ariadne.utils.name_generator import NameGenerator
|
||||
|
||||
|
||||
def test_name_generator_returns_none_after_exhausting_attempts(monkeypatch) -> None:
|
||||
generator = NameGenerator(max_attempts=2)
|
||||
monkeypatch.setattr(NameGenerator, "generate", lambda self: "already-used")
|
||||
|
||||
assert generator.unique({"already-used"}) is None
|
||||
@ -5,10 +5,23 @@ import types
|
||||
import ariadne.services.opensearch_prune as prune_module
|
||||
|
||||
|
||||
def _settings(**overrides):
|
||||
values = {
|
||||
"opensearch_url": "http://opensearch",
|
||||
"opensearch_limit_bytes": 5,
|
||||
"opensearch_index_patterns": "kube-*",
|
||||
"opensearch_timeout_sec": 5.0,
|
||||
}
|
||||
values.update(overrides)
|
||||
return types.SimpleNamespace(**values)
|
||||
|
||||
|
||||
def test_parse_size() -> None:
|
||||
assert prune_module.parse_size("") == 0
|
||||
assert prune_module.parse_size("1gb") == 1024**3
|
||||
assert prune_module.parse_size("0") == 0
|
||||
assert prune_module.parse_size("bad") == 0
|
||||
assert prune_module.parse_size("1zb") == 0
|
||||
|
||||
|
||||
def test_prune_indices_deletes(monkeypatch) -> None:
|
||||
@ -58,3 +71,118 @@ def test_prune_indices_deletes(monkeypatch) -> None:
|
||||
|
||||
summary = prune_module.prune_indices()
|
||||
assert summary.deleted == 1
|
||||
|
||||
|
||||
def test_fetch_indices_ignores_missing_pattern(monkeypatch) -> None:
|
||||
monkeypatch.setattr(prune_module, "settings", _settings())
|
||||
|
||||
class DummyResponse:
|
||||
status_code = prune_module.HTTP_NOT_FOUND
|
||||
|
||||
def raise_for_status(self):
|
||||
raise AssertionError("404 should be handled before raise_for_status")
|
||||
|
||||
client = types.SimpleNamespace(get=lambda *_args, **_kwargs: DummyResponse())
|
||||
|
||||
assert prune_module._fetch_indices(client, "missing-*") == []
|
||||
|
||||
|
||||
def test_prune_indices_returns_when_no_patterns(monkeypatch) -> None:
|
||||
monkeypatch.setattr(prune_module, "settings", _settings(opensearch_index_patterns=" , "))
|
||||
|
||||
summary = prune_module.prune_indices()
|
||||
|
||||
assert summary.detail == "no patterns configured"
|
||||
assert summary.deleted == 0
|
||||
|
||||
|
||||
def test_prune_indices_continues_after_fetch_failure(monkeypatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
prune_module,
|
||||
"settings",
|
||||
_settings(opensearch_index_patterns="bad-*,kube-*", opensearch_limit_bytes=100),
|
||||
)
|
||||
|
||||
class DummyResponse:
|
||||
status_code = 200
|
||||
|
||||
def raise_for_status(self):
|
||||
return None
|
||||
|
||||
def json(self):
|
||||
return [
|
||||
{"index": ".system", "store.size": "100b", "creation.date": "1"},
|
||||
{"index": "kube-1", "store.size": "1b", "creation.date": "2"},
|
||||
]
|
||||
|
||||
class DummyClient:
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def get(self, url, params=None):
|
||||
if "bad-*" in url:
|
||||
raise RuntimeError("fetch failed")
|
||||
return DummyResponse()
|
||||
|
||||
def delete(self, _url):
|
||||
raise AssertionError("within-limit result should not delete indices")
|
||||
|
||||
monkeypatch.setattr(prune_module.httpx, "Client", lambda *args, **kwargs: DummyClient())
|
||||
|
||||
summary = prune_module.prune_indices()
|
||||
|
||||
assert summary.detail == "within limit"
|
||||
assert summary.total_before == 1
|
||||
|
||||
|
||||
def test_prune_indices_logs_delete_failures_and_keeps_pruning(monkeypatch) -> None:
|
||||
monkeypatch.setattr(prune_module, "settings", _settings(opensearch_limit_bytes=5))
|
||||
|
||||
class DummyResponse:
|
||||
status_code = 200
|
||||
|
||||
def __init__(self, payload):
|
||||
self._payload = payload
|
||||
|
||||
def raise_for_status(self):
|
||||
return None
|
||||
|
||||
def json(self):
|
||||
return self._payload
|
||||
|
||||
class DummyClient:
|
||||
def __init__(self):
|
||||
self.deleted: list[str] = []
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def get(self, _url, params=None):
|
||||
return DummyResponse(
|
||||
[
|
||||
{"index": "kube-old", "store.size": "10b", "creation.date": "1"},
|
||||
{"index": "kube-new", "store.size": "10b", "creation.date": "2"},
|
||||
]
|
||||
)
|
||||
|
||||
def delete(self, url):
|
||||
self.deleted.append(url)
|
||||
if url.endswith("/kube-old"):
|
||||
raise RuntimeError("delete failed")
|
||||
return DummyResponse({})
|
||||
|
||||
dummy = DummyClient()
|
||||
monkeypatch.setattr(prune_module.httpx, "Client", lambda *args, **kwargs: dummy)
|
||||
|
||||
summary = prune_module.prune_indices()
|
||||
|
||||
assert summary.deleted == 1
|
||||
assert summary.total_before == 20
|
||||
assert summary.total_after == 10
|
||||
assert dummy.deleted == ["http://opensearch/kube-old", "http://opensearch/kube-new"]
|
||||
|
||||
@ -35,3 +35,15 @@ def test_clean_finished_pods_handles_failure(monkeypatch) -> None:
|
||||
|
||||
summary = pod_cleaner.clean_finished_pods()
|
||||
assert summary.failures == 2
|
||||
|
||||
|
||||
def test_clean_finished_pods_skips_missing_identifiers(monkeypatch) -> None:
|
||||
def fake_get_json(_path: str):
|
||||
return {"items": [{"metadata": {"namespace": "ns"}}, {"metadata": {"name": "pod"}}, {"metadata": "bad"}]}
|
||||
|
||||
monkeypatch.setattr(pod_cleaner, "get_json", fake_get_json)
|
||||
monkeypatch.setattr(pod_cleaner, "delete_json", lambda _path: None)
|
||||
|
||||
summary = pod_cleaner.clean_finished_pods()
|
||||
assert summary.skipped == 6
|
||||
assert summary.deleted == 0
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -4,6 +4,7 @@ from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
import time
|
||||
|
||||
from ariadne.db.storage import ScheduleState
|
||||
from ariadne.scheduler.cron import CronScheduler, CronTask
|
||||
|
||||
|
||||
@ -22,6 +23,9 @@ class DummyStorage:
|
||||
def record_event(self, *args, **kwargs):
|
||||
self.events.append((args, kwargs))
|
||||
|
||||
def list_schedule_states(self):
|
||||
return []
|
||||
|
||||
|
||||
def test_execute_task_records_failure() -> None:
|
||||
storage = DummyStorage()
|
||||
@ -87,6 +91,115 @@ def test_scheduler_start_skips_when_running() -> None:
|
||||
assert scheduler._thread.started is False
|
||||
|
||||
|
||||
def test_scheduler_start_hydrates_persisted_schedule_metrics(monkeypatch) -> None:
|
||||
class HydratingStorage(DummyStorage):
|
||||
def list_schedule_states(self):
|
||||
finished = datetime(2026, 1, 1, 12, 0, tzinfo=timezone.utc)
|
||||
return [
|
||||
ScheduleState(
|
||||
task_name="nightly",
|
||||
cron_expr="30 4 * * *",
|
||||
last_started_at=finished,
|
||||
last_finished_at=finished,
|
||||
last_status="ok",
|
||||
last_error=None,
|
||||
last_duration_ms=100,
|
||||
next_run_at=None,
|
||||
),
|
||||
ScheduleState(
|
||||
task_name="unknown",
|
||||
cron_expr="* * * * *",
|
||||
last_started_at=finished,
|
||||
last_finished_at=finished,
|
||||
last_status="ok",
|
||||
last_error=None,
|
||||
last_duration_ms=100,
|
||||
next_run_at=None,
|
||||
),
|
||||
]
|
||||
|
||||
recorded = []
|
||||
monkeypatch.setattr("ariadne.scheduler.cron.record_schedule_state", lambda *args: recorded.append(args))
|
||||
|
||||
scheduler = CronScheduler(HydratingStorage(), tick_sec=0.01)
|
||||
scheduler.add_task("nightly", "30 4 * * *", lambda: None)
|
||||
scheduler.start()
|
||||
scheduler.stop()
|
||||
|
||||
assert any(item[0] == "nightly" and item[4] is True for item in recorded)
|
||||
assert not any(item[0] == "unknown" for item in recorded)
|
||||
|
||||
|
||||
def test_scheduler_hydration_ignores_storage_without_state_listing() -> None:
|
||||
class MinimalStorage:
|
||||
pass
|
||||
|
||||
scheduler = CronScheduler(MinimalStorage(), tick_sec=0.01)
|
||||
|
||||
scheduler._hydrate_schedule_metrics()
|
||||
|
||||
|
||||
def test_scheduler_hydration_logs_storage_errors(monkeypatch) -> None:
|
||||
class BrokenStorage(DummyStorage):
|
||||
def list_schedule_states(self):
|
||||
raise RuntimeError("storage offline")
|
||||
|
||||
warnings = []
|
||||
scheduler = CronScheduler(BrokenStorage(), tick_sec=0.01)
|
||||
monkeypatch.setattr(scheduler._logger, "warning", lambda *args, **kwargs: warnings.append((args, kwargs)))
|
||||
|
||||
scheduler._hydrate_schedule_metrics()
|
||||
|
||||
assert warnings
|
||||
assert warnings[0][1]["extra"]["detail"] == "storage offline"
|
||||
|
||||
|
||||
def test_scheduler_hydration_records_error_and_unknown_statuses(monkeypatch) -> None:
|
||||
finished = datetime(2026, 1, 1, 12, 0, tzinfo=timezone.utc)
|
||||
|
||||
class StatusStorage(DummyStorage):
|
||||
def list_schedule_states(self):
|
||||
return [
|
||||
ScheduleState(
|
||||
task_name="failed-task",
|
||||
cron_expr="*/5 * * * *",
|
||||
last_started_at=finished,
|
||||
last_finished_at=None,
|
||||
last_status="error",
|
||||
last_error="boom",
|
||||
last_duration_ms=100,
|
||||
next_run_at=None,
|
||||
),
|
||||
ScheduleState(
|
||||
task_name="pending-task",
|
||||
cron_expr="*/10 * * * *",
|
||||
last_started_at=finished,
|
||||
last_finished_at=None,
|
||||
last_status="running",
|
||||
last_error=None,
|
||||
last_duration_ms=100,
|
||||
next_run_at=None,
|
||||
),
|
||||
]
|
||||
|
||||
recorded = []
|
||||
monkeypatch.setattr("ariadne.scheduler.cron.record_schedule_state", lambda *args: recorded.append(args))
|
||||
|
||||
scheduler = CronScheduler(StatusStorage(), tick_sec=0.01)
|
||||
scheduler.add_task("failed-task", "*/5 * * * *", lambda: None)
|
||||
scheduler.add_task("pending-task", "*/10 * * * *", lambda: None)
|
||||
scheduler._next_run.pop("pending-task")
|
||||
|
||||
scheduler._hydrate_schedule_metrics()
|
||||
|
||||
failed = next(item for item in recorded if item[0] == "failed-task")
|
||||
pending = next(item for item in recorded if item[0] == "pending-task")
|
||||
assert failed[2] is None
|
||||
assert failed[4] is False
|
||||
assert pending[3] is None
|
||||
assert pending[4] is None
|
||||
|
||||
|
||||
def test_compute_next_handles_naive_timestamp() -> None:
|
||||
scheduler = CronScheduler(DummyStorage(), tick_sec=0.1)
|
||||
base = datetime(2024, 1, 1, 12, 0, 0)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -25,3 +25,18 @@ def test_from_env_includes_metis_settings(monkeypatch) -> None:
|
||||
assert cfg.metis_watch_url == "http://metis.example/internal/sentinel/watch"
|
||||
assert cfg.metis_timeout_sec == 9.5
|
||||
assert cfg.metis_sentinel_watch_cron == "*/7 * * * *"
|
||||
|
||||
|
||||
def test_from_env_includes_jenkins_weather_settings(monkeypatch) -> None:
|
||||
monkeypatch.setenv("JENKINS_BASE_URL", "https://ci.bstein.dev/")
|
||||
monkeypatch.setenv("JENKINS_API_USER", "ariadne")
|
||||
monkeypatch.setenv("JENKINS_API_TOKEN", "token")
|
||||
monkeypatch.setenv("JENKINS_API_TIMEOUT_SEC", "8.5")
|
||||
monkeypatch.setenv("ARIADNE_SCHEDULE_JENKINS_BUILD_WEATHER", "*/9 * * * *")
|
||||
|
||||
cfg = Settings.from_env()
|
||||
assert cfg.jenkins_base_url == "https://ci.bstein.dev"
|
||||
assert cfg.jenkins_api_user == "ariadne"
|
||||
assert cfg.jenkins_api_token == "token"
|
||||
assert cfg.jenkins_api_timeout_sec == 8.5
|
||||
assert cfg.jenkins_build_weather_cron == "*/9 * * * *"
|
||||
|
||||
@ -345,6 +345,31 @@ def test_update_schedule_state_executes() -> None:
|
||||
assert db.executed
|
||||
|
||||
|
||||
def test_list_schedule_states_returns_valid_rows() -> None:
|
||||
db = DummyDB()
|
||||
now = datetime.now()
|
||||
db.rows = [
|
||||
{
|
||||
"task_name": "schedule.nightly",
|
||||
"cron_expr": "30 4 * * *",
|
||||
"last_started_at": now,
|
||||
"last_finished_at": now,
|
||||
"last_status": "ok",
|
||||
"last_error": None,
|
||||
"last_duration_ms": 10,
|
||||
"next_run_at": None,
|
||||
},
|
||||
{"task_name": None, "cron_expr": "bad"},
|
||||
]
|
||||
storage = Storage(db)
|
||||
|
||||
states = storage.list_schedule_states()
|
||||
|
||||
assert len(states) == 1
|
||||
assert states[0].task_name == "schedule.nightly"
|
||||
assert states[0].last_status == "ok"
|
||||
|
||||
|
||||
def test_record_cluster_state_executes() -> None:
|
||||
db = DummyDB()
|
||||
storage = Storage(db)
|
||||
@ -359,8 +384,27 @@ def test_prune_cluster_state_skips_zero() -> None:
|
||||
assert not db.executed
|
||||
|
||||
|
||||
def test_prune_cluster_state_executes_with_positive_keep() -> None:
|
||||
db = DummyDB()
|
||||
storage = Storage(db)
|
||||
|
||||
storage.prune_cluster_state(3)
|
||||
|
||||
assert db.executed[-1][1] == (3,)
|
||||
|
||||
|
||||
def test_latest_cluster_state_parses_json() -> None:
|
||||
db = DummyDB(row={"snapshot": "{\"ok\": true}", "created_at": datetime.now()})
|
||||
storage = Storage(db)
|
||||
snapshot = storage.latest_cluster_state()
|
||||
assert snapshot == {"ok": True}
|
||||
|
||||
|
||||
def test_latest_cluster_state_handles_empty_and_native_snapshots() -> None:
|
||||
assert Storage(DummyDB(row=None)).latest_cluster_state() is None
|
||||
assert Storage(DummyDB(row={"snapshot": {"ok": True}, "created_at": datetime.now()})).latest_cluster_state() == {"ok": True}
|
||||
|
||||
|
||||
def test_latest_cluster_state_rejects_bad_snapshot_payloads() -> None:
|
||||
assert Storage(DummyDB(row={"snapshot": "{bad", "created_at": datetime.now()})).latest_cluster_state() is None
|
||||
assert Storage(DummyDB(row={"snapshot": 42, "created_at": datetime.now()})).latest_cluster_state() is None
|
||||
|
||||
@ -81,6 +81,19 @@ def test_safe_error_detail_timeout() -> None:
|
||||
assert safe_error_detail(exc, "fallback") == "timeout"
|
||||
|
||||
|
||||
def test_safe_error_detail_http_status_without_message() -> None:
|
||||
request = httpx.Request("GET", "https://example.com")
|
||||
response = httpx.Response(503, json={"detail": "hidden"}, request=request)
|
||||
exc = httpx.HTTPStatusError("bad", request=request, response=response)
|
||||
|
||||
assert safe_error_detail(exc, "fallback") == "http 503"
|
||||
|
||||
|
||||
def test_safe_error_detail_fallbacks_for_empty_runtime_and_generic() -> None:
|
||||
assert safe_error_detail(RuntimeError(" "), "fallback") == "fallback"
|
||||
assert safe_error_detail(ValueError("internal"), "fallback") == "fallback"
|
||||
|
||||
|
||||
def test_extract_bearer_token() -> None:
|
||||
request = DummyRequest({"Authorization": "Bearer token123"})
|
||||
assert extract_bearer_token(request) == "token123"
|
||||
|
||||
0
tests/unit/__init__.py
Normal file
0
tests/unit/__init__.py
Normal file
0
tests/unit/app/__init__.py
Normal file
0
tests/unit/app/__init__.py
Normal file
27
tests/unit/app/app_route_helpers.py
Normal file
27
tests/unit/app/app_route_helpers.py
Normal file
@ -0,0 +1,27 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import dataclasses
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from fastapi import HTTPException
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from ariadne.auth.keycloak import AuthContext
|
||||
|
||||
import ariadne.app as app_module
|
||||
|
||||
def _client(monkeypatch, ctx: AuthContext) -> TestClient:
|
||||
monkeypatch.setattr(app_module.authenticator, "authenticate", lambda token: ctx)
|
||||
monkeypatch.setattr(app_module.provisioning, "start", lambda: None)
|
||||
monkeypatch.setattr(app_module.scheduler, "start", lambda: None)
|
||||
monkeypatch.setattr(app_module.provisioning, "stop", lambda: None)
|
||||
monkeypatch.setattr(app_module.scheduler, "stop", lambda: None)
|
||||
monkeypatch.setattr(app_module.portal_db, "close", lambda: None)
|
||||
monkeypatch.setattr(app_module.ariadne_db, "close", lambda: None)
|
||||
monkeypatch.setattr(app_module.storage, "record_event", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(app_module.storage, "record_task_run", lambda *args, **kwargs: None)
|
||||
return TestClient(app_module.app)
|
||||
|
||||
__all__ = [name for name in globals() if not name.startswith("__")]
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user