Compare commits
42 Commits
main
...
feature/pi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9a86c350dd | ||
|
|
d342053196 | ||
|
|
c3cca8ad9a | ||
|
|
9103cd22f2 | ||
| 094d202803 | |||
| 411bc6b90d | |||
| 26b8f23426 | |||
| 629df65c7b | |||
| e5a824e4e1 | |||
| 6815a67c1f | |||
| deefdb53ad | |||
| 4e4c310cd4 | |||
| df79cad1c3 | |||
| b3d8b13f39 | |||
| a23b6a4b93 | |||
| 38abbd9fe1 | |||
| ac12a9bfed | |||
| 8a371e1267 | |||
| f25186ef7e | |||
| a01dc0813a | |||
| 609cfcb696 | |||
| 75a992b829 | |||
| a87a5f7bff | |||
| a1c8a99866 | |||
| 7b3dfa335b | |||
| e1bba18b52 | |||
| 52882f1bb5 | |||
| 5128741c53 | |||
| 96f923ae4c | |||
| 95bc3953d1 | |||
| f4e921bb33 | |||
| 616c6308b1 | |||
| d9b30d6c5b | |||
| 7c337ad5a1 | |||
| 3823b68ee2 | |||
| 40de2b59a5 | |||
| 5483c04bb3 | |||
| 64b4f14018 | |||
| 166020ca1d | |||
| 60446ee830 | |||
| c38b6c5e27 | |||
| 9419c4b26b |
47
Jenkinsfile
vendored
47
Jenkinsfile
vendored
@ -23,6 +23,8 @@ spec:
|
||||
environment {
|
||||
PIP_DISABLE_PIP_VERSION_CHECK = '1'
|
||||
PYTHONUNBUFFERED = '1'
|
||||
SUITE_NAME = 'titan-iac'
|
||||
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
|
||||
}
|
||||
stages {
|
||||
stage('Checkout') {
|
||||
@ -35,9 +37,36 @@ spec:
|
||||
sh 'pip install --no-cache-dir -r ci/requirements.txt'
|
||||
}
|
||||
}
|
||||
stage('Glue tests') {
|
||||
stage('Run quality gate') {
|
||||
steps {
|
||||
sh 'pytest -q ci/tests/glue'
|
||||
sh '''
|
||||
set -eu
|
||||
mkdir -p build
|
||||
set +e
|
||||
python3 -m testing.quality_gate --profile jenkins --build-dir build
|
||||
quality_gate_rc=$?
|
||||
set -e
|
||||
printf '%s\n' "${quality_gate_rc}" > build/quality-gate.rc
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Publish test metrics') {
|
||||
steps {
|
||||
sh '''
|
||||
set -eu
|
||||
export JUNIT_GLOB='build/junit-*.xml'
|
||||
export QUALITY_GATE_EXIT_CODE_PATH='build/quality-gate.rc'
|
||||
export QUALITY_GATE_SUMMARY_PATH='build/quality-gate-summary.json'
|
||||
python3 ci/scripts/publish_test_metrics.py
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Enforce quality gate') {
|
||||
steps {
|
||||
sh '''
|
||||
set -eu
|
||||
test "$(cat build/quality-gate.rc 2>/dev/null || echo 1)" -eq 0
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Resolve Flux branch') {
|
||||
@ -74,4 +103,18 @@ spec:
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
always {
|
||||
script {
|
||||
if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) {
|
||||
try {
|
||||
junit allowEmptyResults: true, testResults: 'build/junit-*.xml'
|
||||
} catch (Throwable err) {
|
||||
echo "junit step unavailable: ${err.class.simpleName}"
|
||||
}
|
||||
}
|
||||
}
|
||||
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -22,6 +22,8 @@ spec:
|
||||
environment {
|
||||
PIP_DISABLE_PIP_VERSION_CHECK = '1'
|
||||
PYTHONUNBUFFERED = '1'
|
||||
SUITE_NAME = 'titan-iac'
|
||||
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
|
||||
}
|
||||
stages {
|
||||
stage('Checkout') {
|
||||
@ -34,9 +36,36 @@ spec:
|
||||
sh 'pip install --no-cache-dir -r ci/requirements.txt'
|
||||
}
|
||||
}
|
||||
stage('Glue tests') {
|
||||
stage('Run quality gate') {
|
||||
steps {
|
||||
sh 'pytest -q ci/tests/glue'
|
||||
sh '''
|
||||
set -eu
|
||||
mkdir -p build
|
||||
set +e
|
||||
python3 -m testing.quality_gate --profile jenkins --build-dir build
|
||||
quality_gate_rc=$?
|
||||
set -e
|
||||
printf '%s\n' "${quality_gate_rc}" > build/quality-gate.rc
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Publish test metrics') {
|
||||
steps {
|
||||
sh '''
|
||||
set -eu
|
||||
export JUNIT_GLOB='build/junit-*.xml'
|
||||
export QUALITY_GATE_EXIT_CODE_PATH='build/quality-gate.rc'
|
||||
export QUALITY_GATE_SUMMARY_PATH='build/quality-gate-summary.json'
|
||||
python3 ci/scripts/publish_test_metrics.py
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Enforce quality gate') {
|
||||
steps {
|
||||
sh '''
|
||||
set -eu
|
||||
test "$(cat build/quality-gate.rc 2>/dev/null || echo 1)" -eq 0
|
||||
'''
|
||||
}
|
||||
}
|
||||
stage('Resolve Flux branch') {
|
||||
@ -73,4 +102,18 @@ spec:
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
always {
|
||||
script {
|
||||
if (fileExists('build/junit-unit.xml') || fileExists('build/junit-glue.xml')) {
|
||||
try {
|
||||
junit allowEmptyResults: true, testResults: 'build/junit-*.xml'
|
||||
} catch (Throwable err) {
|
||||
echo "junit step unavailable: ${err.class.simpleName}"
|
||||
}
|
||||
}
|
||||
}
|
||||
archiveArtifacts artifacts: 'build/**', allowEmptyArchive: true, fingerprint: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,7 @@
|
||||
pytest==8.3.4
|
||||
pytest-cov==6.0.0
|
||||
coverage==7.6.10
|
||||
kubernetes==30.1.0
|
||||
PyYAML==6.0.2
|
||||
requests==2.32.3
|
||||
ruff==0.8.4
|
||||
|
||||
328
ci/scripts/publish_test_metrics.py
Normal file
328
ci/scripts/publish_test_metrics.py
Normal file
@ -0,0 +1,328 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Publish titan-iac quality-gate results to Pushgateway."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
from glob import glob
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from ci.scripts import publish_test_metrics_quality as _quality_helpers
|
||||
|
||||
CANONICAL_CHECKS = _quality_helpers.CANONICAL_CHECKS
|
||||
_build_check_statuses = _quality_helpers._build_check_statuses
|
||||
_combine_statuses = _quality_helpers._combine_statuses
|
||||
_infer_sonarqube_status = _quality_helpers._infer_sonarqube_status
|
||||
_infer_source_lines_over_500 = _quality_helpers._infer_source_lines_over_500
|
||||
_infer_supply_chain_status = _quality_helpers._infer_supply_chain_status
|
||||
_infer_workspace_coverage_percent = _quality_helpers._infer_workspace_coverage_percent
|
||||
_load_optional_json = _quality_helpers._load_optional_json
|
||||
_normalize_result_status = _quality_helpers._normalize_result_status
|
||||
|
||||
|
||||
def _escape_label(value: str) -> str:
|
||||
"""Escape a Prometheus label value without changing its content."""
|
||||
return value.replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
|
||||
|
||||
|
||||
def _label_str(labels: dict[str, str]) -> str:
|
||||
"""Render a stable Prometheus label set from a mapping."""
|
||||
parts = [f'{key}="{_escape_label(val)}"' for key, val in labels.items() if val]
|
||||
return "{" + ",".join(parts) + "}" if parts else ""
|
||||
|
||||
|
||||
def _read_text(url: str) -> str:
|
||||
"""Fetch a plain-text response body from the given URL."""
|
||||
with urllib.request.urlopen(url, timeout=10) as response:
|
||||
return response.read().decode("utf-8")
|
||||
|
||||
|
||||
def _post_text(url: str, payload: str) -> None:
|
||||
"""PUT a plain-text payload and fail on any 4xx/5xx response."""
|
||||
request = urllib.request.Request(
|
||||
url,
|
||||
data=payload.encode("utf-8"),
|
||||
method="PUT",
|
||||
headers={"Content-Type": "text/plain"},
|
||||
)
|
||||
with urllib.request.urlopen(request, timeout=10) as response:
|
||||
if response.status >= 400:
|
||||
raise RuntimeError(f"push failed with status={response.status}")
|
||||
|
||||
|
||||
def _parse_junit(path: str) -> dict[str, int]:
|
||||
"""Parse a JUnit XML file into aggregate test counters."""
|
||||
if not os.path.exists(path):
|
||||
return {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
|
||||
|
||||
tree = ET.parse(path)
|
||||
root = tree.getroot()
|
||||
totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
|
||||
|
||||
suites: list[ET.Element]
|
||||
if root.tag == "testsuite":
|
||||
suites = [root]
|
||||
elif root.tag == "testsuites":
|
||||
suites = [elem for elem in root if elem.tag == "testsuite"]
|
||||
else:
|
||||
suites = []
|
||||
|
||||
for suite in suites:
|
||||
for key in totals:
|
||||
raw_value = suite.attrib.get(key, "0")
|
||||
try:
|
||||
totals[key] += int(float(raw_value))
|
||||
except ValueError:
|
||||
totals[key] += 0
|
||||
return totals
|
||||
|
||||
|
||||
def _collect_junit_totals(pattern: str) -> dict[str, int]:
|
||||
"""Sum JUnit counters across every XML file matching the pattern."""
|
||||
totals = {"tests": 0, "failures": 0, "errors": 0, "skipped": 0}
|
||||
for path in sorted(glob(pattern)):
|
||||
parsed = _parse_junit(path)
|
||||
for key in totals:
|
||||
totals[key] += parsed[key]
|
||||
return totals
|
||||
|
||||
|
||||
def _collect_junit_cases(pattern: str) -> list[tuple[str, str]]:
|
||||
"""Collect individual JUnit test-case statuses for flaky-test trend panels."""
|
||||
cases: list[tuple[str, str]] = []
|
||||
for path in sorted(glob(pattern)):
|
||||
if not os.path.exists(path):
|
||||
continue
|
||||
root = ET.parse(path).getroot()
|
||||
suites: list[ET.Element]
|
||||
if root.tag == "testsuite":
|
||||
suites = [root]
|
||||
elif root.tag == "testsuites":
|
||||
suites = [elem for elem in root if elem.tag == "testsuite"]
|
||||
else:
|
||||
suites = []
|
||||
for suite in suites:
|
||||
for test_case in suite.findall("testcase"):
|
||||
case_name = test_case.attrib.get("name", "").strip()
|
||||
class_name = test_case.attrib.get("classname", "").strip()
|
||||
if not case_name:
|
||||
continue
|
||||
full_name = f"{class_name}.{case_name}" if class_name else case_name
|
||||
status = "passed"
|
||||
if test_case.find("failure") is not None or test_case.find("error") is not None:
|
||||
status = "failed"
|
||||
elif test_case.find("skipped") is not None:
|
||||
status = "skipped"
|
||||
cases.append((full_name, status))
|
||||
return cases
|
||||
|
||||
|
||||
def _read_exit_code(path: str) -> int:
|
||||
"""Read the quality-gate exit code, defaulting to failure if missing."""
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as handle:
|
||||
return int(handle.read().strip())
|
||||
except (FileNotFoundError, ValueError):
|
||||
return 1
|
||||
|
||||
|
||||
def _load_summary(path: str) -> dict:
|
||||
"""Load the JSON quality-gate summary, returning an empty mapping on error."""
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as handle:
|
||||
return json.load(handle)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return {}
|
||||
|
||||
|
||||
def _summary_float(summary: dict, key: str) -> float:
|
||||
"""Extract a float-like value from the summary, defaulting to 0.0."""
|
||||
value = summary.get(key)
|
||||
if isinstance(value, (int, float)):
|
||||
return float(value)
|
||||
return 0.0
|
||||
|
||||
|
||||
def _summary_int(summary: dict, key: str) -> int:
|
||||
"""Extract an int-like value from the summary, defaulting to 0."""
|
||||
value = summary.get(key)
|
||||
if isinstance(value, int):
|
||||
return value
|
||||
if isinstance(value, float):
|
||||
return int(value)
|
||||
return 0
|
||||
|
||||
|
||||
def _fetch_existing_counter(pushgateway_url: str, metric: str, labels: dict[str, str]) -> float:
|
||||
"""Return the current counter value for a labeled metric if present."""
|
||||
text = _read_text(f"{pushgateway_url.rstrip('/')}/metrics")
|
||||
for line in text.splitlines():
|
||||
if not line.startswith(metric + "{"):
|
||||
continue
|
||||
if any(f'{key}="{value}"' not in line for key, value in labels.items()):
|
||||
continue
|
||||
parts = line.split()
|
||||
if len(parts) < 2:
|
||||
continue
|
||||
try:
|
||||
return float(parts[1])
|
||||
except ValueError:
|
||||
return 0.0
|
||||
return 0.0
|
||||
|
||||
|
||||
def _build_payload(
|
||||
suite: str,
|
||||
status: str,
|
||||
tests: dict[str, int],
|
||||
test_cases: list[tuple[str, str]],
|
||||
ok_count: int,
|
||||
failed_count: int,
|
||||
branch: str,
|
||||
build_number: str,
|
||||
summary: dict | None = None,
|
||||
workspace_line_coverage_percent: float = 0.0,
|
||||
source_lines_over_500: int = 0,
|
||||
check_statuses: dict[str, str] | None = None,
|
||||
) -> str:
|
||||
"""Build the Pushgateway payload for the current suite run."""
|
||||
passed = max(tests["tests"] - tests["failures"] - tests["errors"] - tests["skipped"], 0)
|
||||
build_labels = _label_str(
|
||||
{
|
||||
"suite": suite,
|
||||
"branch": branch or "unknown",
|
||||
"build_number": build_number or "unknown",
|
||||
}
|
||||
)
|
||||
lines = [
|
||||
"# TYPE platform_quality_gate_runs_total counter",
|
||||
f'platform_quality_gate_runs_total{{suite="{suite}",status="ok"}} {ok_count}',
|
||||
f'platform_quality_gate_runs_total{{suite="{suite}",status="failed"}} {failed_count}',
|
||||
"# TYPE titan_iac_quality_gate_tests_total gauge",
|
||||
f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="passed"}} {passed}',
|
||||
f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="failed"}} {tests["failures"]}',
|
||||
f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="error"}} {tests["errors"]}',
|
||||
f'titan_iac_quality_gate_tests_total{{suite="{suite}",result="skipped"}} {tests["skipped"]}',
|
||||
"# TYPE titan_iac_quality_gate_run_status gauge",
|
||||
f'titan_iac_quality_gate_run_status{{suite="{suite}",status="ok"}} {1 if status == "ok" else 0}',
|
||||
f'titan_iac_quality_gate_run_status{{suite="{suite}",status="failed"}} {1 if status == "failed" else 0}',
|
||||
"# TYPE titan_iac_quality_gate_build_info gauge",
|
||||
f"titan_iac_quality_gate_build_info{build_labels} 1",
|
||||
"# TYPE platform_quality_gate_workspace_line_coverage_percent gauge",
|
||||
f'platform_quality_gate_workspace_line_coverage_percent{{suite="{suite}"}} {workspace_line_coverage_percent:.3f}',
|
||||
"# TYPE platform_quality_gate_source_lines_over_500_total gauge",
|
||||
f'platform_quality_gate_source_lines_over_500_total{{suite="{suite}"}} {source_lines_over_500}',
|
||||
]
|
||||
if check_statuses:
|
||||
lines.append("# TYPE titan_iac_quality_gate_checks_total gauge")
|
||||
for check_name in CANONICAL_CHECKS:
|
||||
check_status = check_statuses.get(check_name, "not_applicable")
|
||||
lines.append(
|
||||
f'titan_iac_quality_gate_checks_total{{suite="{suite}",check="{_escape_label(check_name)}",result="{_escape_label(check_status)}"}} 1'
|
||||
)
|
||||
lines.append("# TYPE platform_quality_gate_test_case_result gauge")
|
||||
if test_cases:
|
||||
for test_name, test_status in test_cases:
|
||||
lines.append(
|
||||
f'platform_quality_gate_test_case_result{{suite="{suite}",test="{_escape_label(test_name)}",status="{_escape_label(test_status)}"}} 1'
|
||||
)
|
||||
else:
|
||||
lines.append(
|
||||
f'platform_quality_gate_test_case_result{{suite="{suite}",test="__no_test_cases__",status="skipped"}} 1'
|
||||
)
|
||||
return "\n".join(lines) + "\n"
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Publish the quality-gate metrics and print a compact run summary."""
|
||||
suite = os.getenv("SUITE_NAME", "titan_iac")
|
||||
pushgateway_url = os.getenv("PUSHGATEWAY_URL", "http://platform-quality-gateway.monitoring.svc.cluster.local:9091")
|
||||
job_name = os.getenv("QUALITY_GATE_JOB_NAME", "platform-quality-ci")
|
||||
junit_glob = os.getenv("JUNIT_GLOB", os.getenv("JUNIT_PATH", "build/junit-*.xml"))
|
||||
exit_code_path = os.getenv("QUALITY_GATE_EXIT_CODE_PATH", os.getenv("GLUE_EXIT_CODE_PATH", "build/quality-gate.rc"))
|
||||
summary_path = os.getenv("QUALITY_GATE_SUMMARY_PATH", "build/quality-gate-summary.json")
|
||||
branch = os.getenv("BRANCH_NAME", os.getenv("GIT_BRANCH", ""))
|
||||
build_number = os.getenv("BUILD_NUMBER", "")
|
||||
|
||||
tests = _collect_junit_totals(junit_glob)
|
||||
test_cases = _collect_junit_cases(junit_glob)
|
||||
exit_code = _read_exit_code(exit_code_path)
|
||||
status = "ok" if exit_code == 0 else "failed"
|
||||
summary = _load_summary(summary_path)
|
||||
workspace_line_coverage_percent = _summary_float(summary, "workspace_line_coverage_percent")
|
||||
if workspace_line_coverage_percent <= 0:
|
||||
workspace_line_coverage_percent = _infer_workspace_coverage_percent(summary, "build/coverage-unit.xml")
|
||||
source_lines_over_500 = _summary_int(summary, "source_lines_over_500")
|
||||
if source_lines_over_500 <= 0:
|
||||
source_lines_over_500 = _infer_source_lines_over_500(summary)
|
||||
sonarqube_report = _load_optional_json(os.getenv("QUALITY_GATE_SONARQUBE_REPORT", "build/sonarqube-quality-gate.json"))
|
||||
supply_chain_report = _load_optional_json(os.getenv("QUALITY_GATE_IRONBANK_REPORT", "build/ironbank-compliance.json"))
|
||||
supply_chain_required = os.getenv("QUALITY_GATE_IRONBANK_REQUIRED", "0").strip().lower() in {"1", "true", "yes", "on"}
|
||||
check_statuses = _build_check_statuses(
|
||||
summary=summary,
|
||||
tests=tests,
|
||||
workspace_line_coverage_percent=workspace_line_coverage_percent,
|
||||
source_lines_over_500=source_lines_over_500,
|
||||
sonarqube_report=sonarqube_report,
|
||||
supply_chain_report=supply_chain_report,
|
||||
supply_chain_required=supply_chain_required,
|
||||
)
|
||||
|
||||
ok_count = int(
|
||||
_fetch_existing_counter(
|
||||
pushgateway_url,
|
||||
"platform_quality_gate_runs_total",
|
||||
{"job": job_name, "suite": suite, "status": "ok"},
|
||||
)
|
||||
)
|
||||
failed_count = int(
|
||||
_fetch_existing_counter(
|
||||
pushgateway_url,
|
||||
"platform_quality_gate_runs_total",
|
||||
{"job": job_name, "suite": suite, "status": "failed"},
|
||||
)
|
||||
)
|
||||
if status == "ok":
|
||||
ok_count += 1
|
||||
else:
|
||||
failed_count += 1
|
||||
|
||||
payload = _build_payload(
|
||||
suite=suite,
|
||||
status=status,
|
||||
tests=tests,
|
||||
test_cases=test_cases,
|
||||
ok_count=ok_count,
|
||||
failed_count=failed_count,
|
||||
branch=branch,
|
||||
build_number=build_number,
|
||||
summary=summary,
|
||||
workspace_line_coverage_percent=workspace_line_coverage_percent,
|
||||
source_lines_over_500=source_lines_over_500,
|
||||
check_statuses=check_statuses,
|
||||
)
|
||||
push_url = f"{pushgateway_url.rstrip('/')}/metrics/job/{job_name}/suite/{suite}"
|
||||
_post_text(push_url, payload)
|
||||
|
||||
summary = {
|
||||
"suite": suite,
|
||||
"status": status,
|
||||
"tests_total": tests["tests"],
|
||||
"tests_failed": tests["failures"],
|
||||
"tests_error": tests["errors"],
|
||||
"tests_skipped": tests["skipped"],
|
||||
"ok_count": ok_count,
|
||||
"failed_count": failed_count,
|
||||
"checks_recorded": len(check_statuses),
|
||||
"workspace_line_coverage_percent": workspace_line_coverage_percent,
|
||||
"source_lines_over_500": source_lines_over_500,
|
||||
}
|
||||
print(json.dumps(summary, sort_keys=True))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
raise SystemExit(main())
|
||||
200
ci/scripts/publish_test_metrics_quality.py
Normal file
200
ci/scripts/publish_test_metrics_quality.py
Normal file
@ -0,0 +1,200 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Quality/status helpers for publish_test_metrics."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
SUCCESS_STATUSES = {"ok", "pass", "passed", "success", "compliant"}
|
||||
NOT_APPLICABLE_STATUSES = {"not_applicable", "n/a", "na", "none", "skipped"}
|
||||
FAILED_STATUSES = {"failed", "fail", "error", "errors", "warn", "warning", "red"}
|
||||
|
||||
CANONICAL_CHECKS = [
|
||||
"tests",
|
||||
"coverage",
|
||||
"loc",
|
||||
"docs_naming",
|
||||
"gate_glue",
|
||||
"sonarqube",
|
||||
"supply_chain",
|
||||
]
|
||||
|
||||
|
||||
def _infer_workspace_coverage_percent(summary: dict, default_xml: str) -> float:
|
||||
"""Infer workspace line coverage from quality summary coverage XML metadata."""
|
||||
results = summary.get("results", []) if isinstance(summary, dict) else []
|
||||
coverage_xml = default_xml
|
||||
for result in results:
|
||||
if not isinstance(result, dict):
|
||||
continue
|
||||
if str(result.get("name") or "").strip().lower() != "coverage":
|
||||
continue
|
||||
candidate = str(result.get("coverage_xml") or "").strip()
|
||||
if candidate:
|
||||
coverage_xml = candidate
|
||||
break
|
||||
xml_path = Path(coverage_xml)
|
||||
if not xml_path.exists():
|
||||
return 0.0
|
||||
try:
|
||||
root = ET.parse(xml_path).getroot()
|
||||
line_rate = root.attrib.get("line-rate")
|
||||
if line_rate is None:
|
||||
return 0.0
|
||||
return float(line_rate) * 100.0
|
||||
except (ET.ParseError, OSError, ValueError):
|
||||
return 0.0
|
||||
|
||||
|
||||
def _infer_source_lines_over_500(summary: dict) -> int:
|
||||
"""Infer over-limit source file count from hygiene issue payloads."""
|
||||
results = summary.get("results", []) if isinstance(summary, dict) else []
|
||||
for result in results:
|
||||
if not isinstance(result, dict):
|
||||
continue
|
||||
if str(result.get("name") or "").strip().lower() not in {"hygiene", "loc", "smell"}:
|
||||
continue
|
||||
issues = result.get("issues")
|
||||
if not isinstance(issues, list):
|
||||
continue
|
||||
return sum(1 for item in issues if isinstance(item, str) and item.startswith("file exceeds"))
|
||||
return 0
|
||||
|
||||
|
||||
def _normalize_result_status(value: str | None, default: str = "failed") -> str:
|
||||
"""Map arbitrary check status text into canonical check result buckets."""
|
||||
if not value:
|
||||
return default
|
||||
normalized = value.strip().lower()
|
||||
if normalized in SUCCESS_STATUSES:
|
||||
return "ok"
|
||||
if normalized in NOT_APPLICABLE_STATUSES:
|
||||
return "not_applicable"
|
||||
if normalized in FAILED_STATUSES:
|
||||
return "failed"
|
||||
return default
|
||||
|
||||
|
||||
def _load_optional_json(path: str | None) -> dict:
|
||||
"""Load an optional JSON report file, returning an empty object when absent."""
|
||||
if not path:
|
||||
return {}
|
||||
candidate = Path(path)
|
||||
if not candidate.exists():
|
||||
return {}
|
||||
try:
|
||||
return json.loads(candidate.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError:
|
||||
return {}
|
||||
|
||||
|
||||
def _combine_statuses(statuses: list[str]) -> str:
|
||||
"""Roll up many check statuses into one canonical result."""
|
||||
if not statuses:
|
||||
return "not_applicable"
|
||||
if any(status == "failed" for status in statuses):
|
||||
return "failed"
|
||||
if all(status == "not_applicable" for status in statuses):
|
||||
return "not_applicable"
|
||||
if all(status in {"ok", "not_applicable"} for status in statuses):
|
||||
return "ok"
|
||||
return "failed"
|
||||
|
||||
|
||||
def _infer_sonarqube_status(report: dict) -> str:
|
||||
"""Infer canonical SonarQube check status from its JSON report payload."""
|
||||
if not report:
|
||||
return "not_applicable"
|
||||
status = (
|
||||
report.get("projectStatus", {}).get("status")
|
||||
or report.get("qualityGate", {}).get("status")
|
||||
or report.get("status")
|
||||
)
|
||||
return _normalize_result_status(str(status) if status is not None else None, default="failed")
|
||||
|
||||
|
||||
def _infer_supply_chain_status(report: dict, required: bool) -> str:
|
||||
"""Infer canonical supply-chain status from IronBank/artifact report payload."""
|
||||
if not report:
|
||||
return "failed" if required else "not_applicable"
|
||||
compliant = report.get("compliant")
|
||||
if isinstance(compliant, bool):
|
||||
return "ok" if compliant else "failed"
|
||||
status = report.get("status")
|
||||
if status is None:
|
||||
return "failed" if required else "not_applicable"
|
||||
normalized = _normalize_result_status(str(status), default="failed")
|
||||
if normalized == "not_applicable" and required:
|
||||
return "failed"
|
||||
return normalized
|
||||
|
||||
|
||||
def _build_check_statuses(
|
||||
summary: dict | None,
|
||||
tests: dict[str, int],
|
||||
workspace_line_coverage_percent: float,
|
||||
source_lines_over_500: int,
|
||||
sonarqube_report: dict,
|
||||
supply_chain_report: dict,
|
||||
supply_chain_required: bool,
|
||||
) -> dict[str, str]:
|
||||
"""Generate the canonical quality-check status map for dashboarding."""
|
||||
raw_results = summary.get("results", []) if isinstance(summary, dict) else []
|
||||
status_by_name: dict[str, str] = {}
|
||||
for result in raw_results:
|
||||
if not isinstance(result, dict):
|
||||
continue
|
||||
check_name = str(result.get("name") or "").strip().lower()
|
||||
if not check_name:
|
||||
continue
|
||||
status_by_name[check_name] = _normalize_result_status(result.get("status"), default="failed")
|
||||
|
||||
tests_status = status_by_name.get("tests")
|
||||
if not tests_status:
|
||||
candidate_keys = ["unit", "integration", "e2e", "pytest", "test", "tests"]
|
||||
candidates = [status_by_name[key] for key in candidate_keys if key in status_by_name]
|
||||
if candidates:
|
||||
tests_status = _combine_statuses(candidates)
|
||||
elif tests["tests"] > 0:
|
||||
tests_status = "ok" if (tests["failures"] + tests["errors"]) == 0 else "failed"
|
||||
else:
|
||||
tests_status = "not_applicable"
|
||||
|
||||
coverage_status = status_by_name.get("coverage")
|
||||
if not coverage_status:
|
||||
if workspace_line_coverage_percent > 0:
|
||||
coverage_status = "ok" if workspace_line_coverage_percent >= 95.0 else "failed"
|
||||
else:
|
||||
coverage_status = "not_applicable"
|
||||
|
||||
loc_status = status_by_name.get("loc")
|
||||
if not loc_status:
|
||||
loc_status = "ok" if source_lines_over_500 == 0 else "failed"
|
||||
|
||||
docs_naming_status = status_by_name.get("docs_naming")
|
||||
if not docs_naming_status:
|
||||
candidates = [status_by_name[key] for key in ["docs", "hygiene", "smell", "lint", "naming"] if key in status_by_name]
|
||||
docs_naming_status = _combine_statuses(candidates) if candidates else "not_applicable"
|
||||
|
||||
gate_glue_status = status_by_name.get("gate_glue")
|
||||
if not gate_glue_status:
|
||||
candidates = [status_by_name[key] for key in ["gate_glue", "glue", "gate"] if key in status_by_name]
|
||||
gate_glue_status = _combine_statuses(candidates) if candidates else "not_applicable"
|
||||
|
||||
sonarqube_status = status_by_name.get("sonarqube") or _infer_sonarqube_status(sonarqube_report)
|
||||
supply_chain_status = status_by_name.get("supply_chain") or _infer_supply_chain_status(
|
||||
supply_chain_report,
|
||||
required=supply_chain_required,
|
||||
)
|
||||
|
||||
return {
|
||||
"tests": tests_status,
|
||||
"coverage": coverage_status,
|
||||
"loc": loc_status,
|
||||
"docs_naming": docs_naming_status,
|
||||
"gate_glue": gate_glue_status,
|
||||
"sonarqube": sonarqube_status,
|
||||
"supply_chain": supply_chain_status,
|
||||
}
|
||||
@ -1,16 +1,52 @@
|
||||
max_success_age_hours: 48
|
||||
allow_suspended:
|
||||
- bstein-dev-home/vaultwarden-cred-sync
|
||||
- comms/othrys-room-reset
|
||||
- comms/pin-othrys-invite
|
||||
- comms/seed-othrys-room
|
||||
- finance/firefly-user-sync
|
||||
- health/wger-admin-ensure
|
||||
- health/wger-user-sync
|
||||
- mailu-mailserver/mailu-sync-nightly
|
||||
- nextcloud/nextcloud-mail-sync
|
||||
ariadne_schedule_tasks:
|
||||
- schedule.mailu_sync
|
||||
- schedule.nextcloud_sync
|
||||
- schedule.vaultwarden_sync
|
||||
- schedule.wger_admin
|
||||
- task: schedule.mailu_sync
|
||||
check_last_success: false
|
||||
- task: schedule.nextcloud_sync
|
||||
check_last_success: true
|
||||
max_success_age_hours: 48
|
||||
- task: schedule.nextcloud_cron
|
||||
check_last_success: true
|
||||
max_success_age_hours: 48
|
||||
- task: schedule.nextcloud_maintenance
|
||||
check_last_success: false
|
||||
- task: schedule.vaultwarden_sync
|
||||
check_last_success: true
|
||||
max_success_age_hours: 48
|
||||
- task: schedule.wger_user_sync
|
||||
check_last_success: true
|
||||
max_success_age_hours: 48
|
||||
- task: schedule.wger_admin
|
||||
check_last_success: false
|
||||
- task: schedule.firefly_user_sync
|
||||
check_last_success: true
|
||||
max_success_age_hours: 48
|
||||
- task: schedule.firefly_cron
|
||||
check_last_success: false
|
||||
- task: schedule.vault_k8s_auth
|
||||
check_last_success: false
|
||||
- task: schedule.vault_oidc
|
||||
check_last_success: false
|
||||
- task: schedule.comms_guest_name
|
||||
check_last_success: true
|
||||
max_success_age_hours: 48
|
||||
- task: schedule.comms_pin_invite
|
||||
check_last_success: false
|
||||
- task: schedule.comms_reset_room
|
||||
check_last_success: false
|
||||
- task: schedule.comms_seed_room
|
||||
check_last_success: true
|
||||
max_success_age_hours: 48
|
||||
- task: schedule.pod_cleaner
|
||||
check_last_success: true
|
||||
max_success_age_hours: 6
|
||||
- task: schedule.opensearch_prune
|
||||
check_last_success: false
|
||||
- task: schedule.image_sweeper
|
||||
check_last_success: true
|
||||
max_success_age_hours: 18
|
||||
- task: schedule.metis_k3s_token_sync
|
||||
check_last_success: true
|
||||
max_success_age_hours: 12
|
||||
- task: schedule.platform_quality_suite_probe
|
||||
check_last_success: true
|
||||
max_success_age_hours: 2
|
||||
|
||||
88
ci/tests/glue/test_ariadne_schedules.py
Normal file
88
ci/tests/glue/test_ariadne_schedules.py
Normal file
@ -0,0 +1,88 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
import yaml
|
||||
|
||||
|
||||
CONFIG_PATH = Path(__file__).with_name("config.yaml")
|
||||
|
||||
|
||||
def _load_config() -> dict:
|
||||
with CONFIG_PATH.open("r", encoding="utf-8") as handle:
|
||||
return yaml.safe_load(handle) or {}
|
||||
|
||||
|
||||
def _query(promql: str) -> list[dict]:
|
||||
vm_url = os.environ.get("VM_URL", "http://victoria-metrics-single-server:8428").rstrip("/")
|
||||
response = requests.get(f"{vm_url}/api/v1/query", params={"query": promql}, timeout=10)
|
||||
response.raise_for_status()
|
||||
payload = response.json()
|
||||
return payload.get("data", {}).get("result", [])
|
||||
|
||||
|
||||
def _expected_tasks() -> list[dict]:
|
||||
cfg = _load_config()
|
||||
tasks = cfg.get("ariadne_schedule_tasks", [])
|
||||
assert tasks, "No Ariadne schedule tasks configured"
|
||||
return tasks
|
||||
|
||||
|
||||
def _tracked_tasks(tasks: list[dict]) -> list[dict]:
|
||||
tracked = [item for item in tasks if item.get("check_last_success")]
|
||||
assert tracked, "No Ariadne schedule tasks are marked for success tracking"
|
||||
return tracked
|
||||
|
||||
|
||||
def _task_regex(tasks: list[dict]) -> str:
|
||||
return "|".join(item["task"] for item in tasks)
|
||||
|
||||
|
||||
def test_ariadne_schedule_series_exist():
|
||||
tasks = _expected_tasks()
|
||||
selector = _task_regex(tasks)
|
||||
series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
|
||||
seen = {item.get("metric", {}).get("task") for item in series}
|
||||
missing = [item["task"] for item in tasks if item["task"] not in seen]
|
||||
assert not missing, f"Missing next-run metrics for: {', '.join(missing)}"
|
||||
|
||||
|
||||
def test_ariadne_schedule_recent_success():
|
||||
tasks = _tracked_tasks(_expected_tasks())
|
||||
selector = _task_regex(tasks)
|
||||
series = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
|
||||
seen = {item.get("metric", {}).get("task") for item in series}
|
||||
missing = [item["task"] for item in tasks if item["task"] not in seen]
|
||||
assert not missing, f"Missing last-success metrics for: {', '.join(missing)}"
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
age_by_task = {
|
||||
item.get("metric", {}).get("task"): (now - datetime.fromtimestamp(float(item["value"][1]), tz=timezone.utc)).total_seconds() / 3600
|
||||
for item in series
|
||||
}
|
||||
too_old = [
|
||||
f"{task} ({age_by_task[task]:.1f}h > {item['max_success_age_hours']}h)"
|
||||
for item in tasks
|
||||
if (task := item["task"]) in age_by_task and age_by_task[task] > float(item["max_success_age_hours"])
|
||||
]
|
||||
assert not too_old, "Ariadne schedules are stale: " + ", ".join(too_old)
|
||||
|
||||
|
||||
def test_ariadne_schedule_last_status_present_and_boolean():
|
||||
tasks = _tracked_tasks(_expected_tasks())
|
||||
selector = _task_regex(tasks)
|
||||
series = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
|
||||
seen = {item.get("metric", {}).get("task") for item in series}
|
||||
missing = [item["task"] for item in tasks if item["task"] not in seen]
|
||||
assert not missing, f"Missing last-status metrics for: {', '.join(missing)}"
|
||||
|
||||
invalid = []
|
||||
for item in series:
|
||||
task = item.get("metric", {}).get("task")
|
||||
value = float(item["value"][1])
|
||||
if value not in (0.0, 1.0):
|
||||
invalid.append(f"{task}={value}")
|
||||
assert not invalid, f"Unexpected Ariadne last-status values: {', '.join(invalid)}"
|
||||
@ -1,46 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
from kubernetes import client, config
|
||||
|
||||
|
||||
CONFIG_PATH = Path(__file__).with_name("config.yaml")
|
||||
|
||||
|
||||
def _load_config() -> dict:
|
||||
with CONFIG_PATH.open("r", encoding="utf-8") as handle:
|
||||
return yaml.safe_load(handle) or {}
|
||||
|
||||
|
||||
def _load_kube():
|
||||
try:
|
||||
config.load_incluster_config()
|
||||
except config.ConfigException:
|
||||
config.load_kube_config()
|
||||
|
||||
|
||||
def test_glue_cronjobs_recent_success():
|
||||
cfg = _load_config()
|
||||
max_age_hours = int(cfg.get("max_success_age_hours", 48))
|
||||
allow_suspended = set(cfg.get("allow_suspended", []))
|
||||
|
||||
_load_kube()
|
||||
batch = client.BatchV1Api()
|
||||
cronjobs = batch.list_cron_job_for_all_namespaces(label_selector="atlas.bstein.dev/glue=true").items
|
||||
|
||||
assert cronjobs, "No glue cronjobs found with atlas.bstein.dev/glue=true"
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
for cronjob in cronjobs:
|
||||
name = f"{cronjob.metadata.namespace}/{cronjob.metadata.name}"
|
||||
if cronjob.spec.suspend:
|
||||
assert name in allow_suspended, f"{name} is suspended but not in allow_suspended"
|
||||
continue
|
||||
|
||||
last_success = cronjob.status.last_successful_time
|
||||
assert last_success is not None, f"{name} has no lastSuccessfulTime"
|
||||
age_hours = (now - last_success).total_seconds() / 3600
|
||||
assert age_hours <= max_age_hours, f"{name} last success {age_hours:.1f}h ago"
|
||||
@ -23,26 +23,45 @@ def _query(promql: str) -> list[dict]:
|
||||
return payload.get("data", {}).get("result", [])
|
||||
|
||||
|
||||
def test_glue_metrics_present():
|
||||
series = _query('kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}')
|
||||
assert series, "No glue cronjob label series found"
|
||||
def _expected_tasks() -> list[dict]:
|
||||
cfg = _load_config()
|
||||
tasks = cfg.get("ariadne_schedule_tasks", [])
|
||||
assert tasks, "No Ariadne schedule tasks configured"
|
||||
return tasks
|
||||
|
||||
|
||||
def test_glue_metrics_success_join():
|
||||
query = (
|
||||
"kube_cronjob_status_last_successful_time "
|
||||
'and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}'
|
||||
)
|
||||
series = _query(query)
|
||||
assert series, "No glue cronjob last success series found"
|
||||
def _tracked_tasks(tasks: list[dict]) -> list[dict]:
|
||||
tracked = [item for item in tasks if item.get("check_last_success")]
|
||||
assert tracked, "No Ariadne schedule tasks are marked for success tracking"
|
||||
return tracked
|
||||
|
||||
|
||||
def _task_regex(tasks: list[dict]) -> str:
|
||||
return "|".join(item["task"] for item in tasks)
|
||||
|
||||
|
||||
def test_ariadne_schedule_metrics_present():
|
||||
cfg = _load_config()
|
||||
expected = cfg.get("ariadne_schedule_tasks", [])
|
||||
if not expected:
|
||||
return
|
||||
series = _query("ariadne_schedule_next_run_timestamp_seconds")
|
||||
tasks = {item.get("metric", {}).get("task") for item in series}
|
||||
missing = [task for task in expected if task not in tasks]
|
||||
tasks = _expected_tasks()
|
||||
selector = _task_regex(tasks)
|
||||
series = _query(f'ariadne_schedule_next_run_timestamp_seconds{{task=~"{selector}"}}')
|
||||
seen = {item.get("metric", {}).get("task") for item in series}
|
||||
missing = [item["task"] for item in tasks if item["task"] not in seen]
|
||||
assert not missing, f"Missing Ariadne schedule metrics for: {', '.join(missing)}"
|
||||
|
||||
|
||||
def test_ariadne_schedule_success_and_status_metrics_present():
|
||||
tasks = _tracked_tasks(_expected_tasks())
|
||||
selector = _task_regex(tasks)
|
||||
|
||||
success = _query(f'ariadne_schedule_last_success_timestamp_seconds{{task=~"{selector}"}}')
|
||||
status = _query(f'ariadne_schedule_last_status{{task=~"{selector}"}}')
|
||||
|
||||
success_tasks = {item.get("metric", {}).get("task") for item in success}
|
||||
status_tasks = {item.get("metric", {}).get("task") for item in status}
|
||||
expected = {item["task"] for item in tasks}
|
||||
|
||||
missing_success = sorted(expected - success_tasks)
|
||||
missing_status = sorted(expected - status_tasks)
|
||||
|
||||
assert not missing_success, f"Missing Ariadne success metrics for: {', '.join(missing_success)}"
|
||||
assert not missing_status, f"Missing Ariadne status metrics for: {', '.join(missing_status)}"
|
||||
|
||||
@ -25,6 +25,7 @@ resources:
|
||||
- mailu/kustomization.yaml
|
||||
- jenkins/kustomization.yaml
|
||||
- ai-llm/kustomization.yaml
|
||||
- typhon/kustomization.yaml
|
||||
- nextcloud/kustomization.yaml
|
||||
- nextcloud-mail-sync/kustomization.yaml
|
||||
- outline/kustomization.yaml
|
||||
|
||||
@ -0,0 +1,29 @@
|
||||
# clusters/atlas/flux-system/applications/typhon/kustomization.yaml
|
||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||
kind: Kustomization
|
||||
metadata:
|
||||
name: typhon
|
||||
namespace: flux-system
|
||||
spec:
|
||||
interval: 10m
|
||||
path: ./services/typhon
|
||||
prune: true
|
||||
sourceRef:
|
||||
kind: GitRepository
|
||||
name: flux-system
|
||||
targetNamespace: climate
|
||||
dependsOn:
|
||||
- name: vault
|
||||
- name: vault-csi
|
||||
- name: monitoring
|
||||
healthChecks:
|
||||
- apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: typhon
|
||||
namespace: climate
|
||||
- apiVersion: v1
|
||||
kind: Service
|
||||
name: typhon
|
||||
namespace: climate
|
||||
wait: false
|
||||
timeout: 20m
|
||||
3
pytest.ini
Normal file
3
pytest.ini
Normal file
@ -0,0 +1,3 @@
|
||||
[pytest]
|
||||
addopts = -ra
|
||||
norecursedirs = .git .venv .venv-ci __pycache__ tmp
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,7 @@
|
||||
import importlib.util
|
||||
import pathlib
|
||||
import sys
|
||||
import types
|
||||
|
||||
import pytest
|
||||
|
||||
@ -20,6 +22,26 @@ def load_sync_module(monkeypatch):
|
||||
}
|
||||
for k, v in env.items():
|
||||
monkeypatch.setenv(k, v)
|
||||
fake_psycopg2 = types.ModuleType("psycopg2")
|
||||
fake_psycopg2.Error = Exception
|
||||
fake_psycopg2.connect = lambda **kwargs: None
|
||||
fake_psycopg2_extras = types.ModuleType("psycopg2.extras")
|
||||
fake_psycopg2_extras.RealDictCursor = object
|
||||
fake_passlib = types.ModuleType("passlib")
|
||||
fake_passlib_hash = types.ModuleType("passlib.hash")
|
||||
|
||||
class _FakeBcryptSha256:
|
||||
@staticmethod
|
||||
def hash(password):
|
||||
return f"stub:{password}"
|
||||
|
||||
fake_passlib_hash.bcrypt_sha256 = _FakeBcryptSha256
|
||||
fake_passlib.hash = fake_passlib_hash
|
||||
|
||||
monkeypatch.setitem(sys.modules, "psycopg2", fake_psycopg2)
|
||||
monkeypatch.setitem(sys.modules, "psycopg2.extras", fake_psycopg2_extras)
|
||||
monkeypatch.setitem(sys.modules, "passlib", fake_passlib)
|
||||
monkeypatch.setitem(sys.modules, "passlib.hash", fake_passlib_hash)
|
||||
module_path = (
|
||||
pathlib.Path(__file__).resolve().parents[2]
|
||||
/ "services"
|
||||
|
||||
@ -15,7 +15,6 @@ resources:
|
||||
- frontend-service.yaml
|
||||
- backend-deployment.yaml
|
||||
- backend-service.yaml
|
||||
- vaultwarden-cred-sync-cronjob.yaml
|
||||
- oneoffs/portal-onboarding-e2e-test-job.yaml
|
||||
- ingress.yaml
|
||||
images:
|
||||
@ -30,12 +29,6 @@ configMapGenerator:
|
||||
- gateway.py=scripts/gateway.py
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
- name: vaultwarden-cred-sync-script
|
||||
namespace: bstein-dev-home
|
||||
files:
|
||||
- vaultwarden_cred_sync.py=scripts/vaultwarden_cred_sync.py
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
- name: portal-onboarding-e2e-tests
|
||||
namespace: bstein-dev-home
|
||||
files:
|
||||
|
||||
@ -1,245 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Iterable
|
||||
|
||||
import httpx
|
||||
|
||||
from atlas_portal import settings
|
||||
from atlas_portal.keycloak import admin_client
|
||||
from atlas_portal.vaultwarden import invite_user
|
||||
|
||||
|
||||
VAULTWARDEN_EMAIL_ATTR = "vaultwarden_email"
|
||||
VAULTWARDEN_STATUS_ATTR = "vaultwarden_status"
|
||||
VAULTWARDEN_SYNCED_AT_ATTR = "vaultwarden_synced_at"
|
||||
VAULTWARDEN_RETRY_COOLDOWN_SEC = int(os.getenv("VAULTWARDEN_RETRY_COOLDOWN_SEC", "1800"))
|
||||
VAULTWARDEN_FAILURE_BAILOUT = int(os.getenv("VAULTWARDEN_FAILURE_BAILOUT", "2"))
|
||||
|
||||
|
||||
def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]:
|
||||
client = admin_client()
|
||||
if not client.ready():
|
||||
raise RuntimeError("keycloak admin client not configured")
|
||||
|
||||
url = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users"
|
||||
first = 0
|
||||
while True:
|
||||
headers = _headers_with_retry(client)
|
||||
# We need attributes for idempotency (vaultwarden_status/vaultwarden_email). Keycloak defaults to a
|
||||
# brief representation which may omit these.
|
||||
params = {"first": str(first), "max": str(page_size), "briefRepresentation": "false"}
|
||||
payload = None
|
||||
for attempt in range(1, 6):
|
||||
try:
|
||||
with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http:
|
||||
resp = http.get(url, params=params, headers=headers)
|
||||
resp.raise_for_status()
|
||||
payload = resp.json()
|
||||
break
|
||||
except httpx.HTTPError as exc:
|
||||
if attempt == 5:
|
||||
raise
|
||||
time.sleep(attempt * 2)
|
||||
|
||||
if not isinstance(payload, list) or not payload:
|
||||
return
|
||||
|
||||
for item in payload:
|
||||
if isinstance(item, dict):
|
||||
yield item
|
||||
|
||||
if len(payload) < page_size:
|
||||
return
|
||||
first += page_size
|
||||
|
||||
|
||||
def _headers_with_retry(client, attempts: int = 6) -> dict[str, str]:
|
||||
last_exc: Exception | None = None
|
||||
for attempt in range(1, attempts + 1):
|
||||
try:
|
||||
return client.headers()
|
||||
except Exception as exc:
|
||||
last_exc = exc
|
||||
time.sleep(attempt * 2)
|
||||
if last_exc:
|
||||
raise last_exc
|
||||
raise RuntimeError("failed to fetch keycloak headers")
|
||||
|
||||
|
||||
def _extract_attr(attrs: Any, key: str) -> str:
|
||||
if not isinstance(attrs, dict):
|
||||
return ""
|
||||
raw = attrs.get(key)
|
||||
if isinstance(raw, list):
|
||||
for item in raw:
|
||||
if isinstance(item, str) and item.strip():
|
||||
return item.strip()
|
||||
return ""
|
||||
if isinstance(raw, str) and raw.strip():
|
||||
return raw.strip()
|
||||
return ""
|
||||
|
||||
|
||||
def _parse_synced_at(value: str) -> float | None:
|
||||
value = (value or "").strip()
|
||||
if not value:
|
||||
return None
|
||||
for fmt in ("%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S%z"):
|
||||
try:
|
||||
parsed = datetime.strptime(value, fmt)
|
||||
if parsed.tzinfo is None:
|
||||
parsed = parsed.replace(tzinfo=timezone.utc)
|
||||
return parsed.timestamp()
|
||||
except ValueError:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def _vaultwarden_email_for_user(user: dict[str, Any]) -> str:
|
||||
username = (user.get("username") if isinstance(user.get("username"), str) else "") or ""
|
||||
username = username.strip()
|
||||
if not username:
|
||||
return ""
|
||||
|
||||
attrs = user.get("attributes")
|
||||
vaultwarden_email = _extract_attr(attrs, VAULTWARDEN_EMAIL_ATTR)
|
||||
if vaultwarden_email:
|
||||
return vaultwarden_email
|
||||
|
||||
mailu_email = _extract_attr(attrs, "mailu_email")
|
||||
if mailu_email:
|
||||
return mailu_email
|
||||
|
||||
email = (user.get("email") if isinstance(user.get("email"), str) else "") or ""
|
||||
email = email.strip()
|
||||
if email and email.lower().endswith(f"@{settings.MAILU_DOMAIN.lower()}"):
|
||||
return email
|
||||
|
||||
# Don't guess an internal mailbox address until Mailu sync has run and stored mailu_email.
|
||||
# This avoids spamming Vaultwarden invites that can never be delivered (unknown recipient).
|
||||
return ""
|
||||
|
||||
|
||||
def _set_user_attribute_if_missing(username: str, user: dict[str, Any], key: str, value: str) -> None:
|
||||
value = (value or "").strip()
|
||||
if not value:
|
||||
return
|
||||
existing = _extract_attr(user.get("attributes"), key)
|
||||
if existing:
|
||||
return
|
||||
admin_client().set_user_attribute(username, key, value)
|
||||
|
||||
|
||||
def _set_user_attribute(username: str, key: str, value: str) -> None:
|
||||
value = (value or "").strip()
|
||||
if not value:
|
||||
return
|
||||
admin_client().set_user_attribute(username, key, value)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
processed = 0
|
||||
created = 0
|
||||
skipped = 0
|
||||
failures = 0
|
||||
consecutive_failures = 0
|
||||
|
||||
for user in _iter_keycloak_users():
|
||||
username = (user.get("username") if isinstance(user.get("username"), str) else "") or ""
|
||||
username = username.strip()
|
||||
if not username:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
enabled = user.get("enabled")
|
||||
if enabled is False:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
if user.get("serviceAccountClientId") or username.startswith("service-account-"):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# Fetch the full user payload so we can reliably read attributes (and skip re-invites).
|
||||
user_id = (user.get("id") if isinstance(user.get("id"), str) else "") or ""
|
||||
user_id = user_id.strip()
|
||||
full_user = user
|
||||
if user_id:
|
||||
try:
|
||||
full_user = admin_client().get_user(user_id)
|
||||
except Exception:
|
||||
full_user = user
|
||||
|
||||
current_status = _extract_attr(full_user.get("attributes"), VAULTWARDEN_STATUS_ATTR)
|
||||
current_synced_at = _extract_attr(full_user.get("attributes"), VAULTWARDEN_SYNCED_AT_ATTR)
|
||||
current_synced_ts = _parse_synced_at(current_synced_at)
|
||||
if current_status in {"rate_limited", "error"} and current_synced_ts:
|
||||
if time.time() - current_synced_ts < VAULTWARDEN_RETRY_COOLDOWN_SEC:
|
||||
skipped += 1
|
||||
continue
|
||||
email = _vaultwarden_email_for_user(full_user)
|
||||
if not email:
|
||||
print(f"skip {username}: missing email", file=sys.stderr)
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
_set_user_attribute_if_missing(username, full_user, VAULTWARDEN_EMAIL_ATTR, email)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# If we've already successfully invited or confirmed presence, do not re-invite on every cron run.
|
||||
# Vaultwarden returns 409 for "already exists", which is idempotent but noisy and can trigger rate limits.
|
||||
if current_status in {"invited", "already_present"}:
|
||||
if not current_synced_at:
|
||||
try:
|
||||
_set_user_attribute(
|
||||
username,
|
||||
VAULTWARDEN_SYNCED_AT_ATTR,
|
||||
time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
processed += 1
|
||||
result = invite_user(email)
|
||||
if result.ok:
|
||||
created += 1
|
||||
consecutive_failures = 0
|
||||
print(f"ok {username}: {result.status}")
|
||||
try:
|
||||
_set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status)
|
||||
_set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
failures += 1
|
||||
if result.status in {"rate_limited", "error"}:
|
||||
consecutive_failures += 1
|
||||
print(f"err {username}: {result.status} {result.detail}", file=sys.stderr)
|
||||
try:
|
||||
_set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status)
|
||||
_set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
|
||||
except Exception:
|
||||
pass
|
||||
if consecutive_failures >= VAULTWARDEN_FAILURE_BAILOUT:
|
||||
print("vaultwarden: too many consecutive failures; aborting run", file=sys.stderr)
|
||||
break
|
||||
|
||||
print(
|
||||
f"done processed={processed} created_or_present={created} skipped={skipped} failures={failures}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 0 if failures == 0 else 2
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@ -1,86 +0,0 @@
|
||||
# services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: vaultwarden-cred-sync
|
||||
namespace: bstein-dev-home
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "*/15 * * * *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/role: "bstein-dev-home"
|
||||
vault.hashicorp.com/agent-inject-secret-portal-env.sh: "kv/data/atlas/portal/atlas-portal-db"
|
||||
vault.hashicorp.com/agent-inject-template-portal-env.sh: |
|
||||
{{ with secret "kv/data/atlas/portal/atlas-portal-db" }}
|
||||
export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}"
|
||||
{{ end }}
|
||||
{{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }}
|
||||
export KEYCLOAK_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}"
|
||||
{{ end }}
|
||||
{{ with secret "kv/data/atlas/shared/chat-ai-keys-runtime" }}
|
||||
export CHAT_KEY_MATRIX="{{ .Data.data.matrix }}"
|
||||
export CHAT_KEY_HOMEPAGE="{{ .Data.data.homepage }}"
|
||||
{{ end }}
|
||||
{{ with secret "kv/data/atlas/shared/portal-e2e-client" }}
|
||||
export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}"
|
||||
export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}"
|
||||
{{ end }}
|
||||
spec:
|
||||
serviceAccountName: bstein-dev-home
|
||||
restartPolicy: Never
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
imagePullSecrets:
|
||||
- name: harbor-regcred
|
||||
containers:
|
||||
- name: sync
|
||||
image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-95
|
||||
imagePullPolicy: Always
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- >-
|
||||
. /vault/secrets/portal-env.sh
|
||||
&& exec python /scripts/vaultwarden_cred_sync.py
|
||||
env:
|
||||
- name: PYTHONPATH
|
||||
value: /app
|
||||
- name: KEYCLOAK_ENABLED
|
||||
value: "true"
|
||||
- name: KEYCLOAK_REALM
|
||||
value: atlas
|
||||
- name: KEYCLOAK_ADMIN_URL
|
||||
value: http://keycloak.sso.svc.cluster.local
|
||||
- name: KEYCLOAK_ADMIN_REALM
|
||||
value: atlas
|
||||
- name: KEYCLOAK_ADMIN_CLIENT_ID
|
||||
value: bstein-dev-home-admin
|
||||
- name: HTTP_CHECK_TIMEOUT_SEC
|
||||
value: "20"
|
||||
- name: VAULTWARDEN_ADMIN_SESSION_TTL_SEC
|
||||
value: "900"
|
||||
- name: VAULTWARDEN_RETRY_COOLDOWN_SEC
|
||||
value: "1800"
|
||||
- name: VAULTWARDEN_FAILURE_BAILOUT
|
||||
value: "2"
|
||||
volumeMounts:
|
||||
- name: vaultwarden-cred-sync-script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: vaultwarden-cred-sync-script
|
||||
configMap:
|
||||
name: vaultwarden-cred-sync-script
|
||||
defaultMode: 0555
|
||||
@ -17,6 +17,7 @@ spec:
|
||||
spec:
|
||||
nodeSelector:
|
||||
hardware: rpi5
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: element-call
|
||||
image: ghcr.io/element-hq/element-call@sha256:e6897c7818331714eae19d83ef8ea94a8b41115f0d8d3f62c2fed2d02c65c9bc
|
||||
|
||||
@ -1,471 +0,0 @@
|
||||
# services/comms/guest-name-job.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: guest-name-randomizer
|
||||
namespace: comms
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "*/1 * * * *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 1
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/role: "comms"
|
||||
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
|
||||
vault.hashicorp.com/agent-inject-template-turn-secret: |
|
||||
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
|
||||
vault.hashicorp.com/agent-inject-template-livekit-primary: |
|
||||
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-bot-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-seeder-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-chat-matrix: |
|
||||
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-chat-homepage: |
|
||||
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
|
||||
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
|
||||
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
serviceAccountName: comms-vault
|
||||
nodeSelector:
|
||||
hardware: rpi5
|
||||
volumes:
|
||||
- name: vault-scripts
|
||||
configMap:
|
||||
name: comms-vault-env
|
||||
defaultMode: 0555
|
||||
containers:
|
||||
- name: rename
|
||||
image: registry.bstein.dev/bstein/comms-guest-tools:0.1.0
|
||||
volumeMounts:
|
||||
- name: vault-scripts
|
||||
mountPath: /vault/scripts
|
||||
readOnly: true
|
||||
env:
|
||||
- name: SYNAPSE_BASE
|
||||
value: http://othrys-synapse-matrix-synapse:8008
|
||||
- name: MAS_ADMIN_CLIENT_ID
|
||||
value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM
|
||||
- name: MAS_ADMIN_CLIENT_SECRET_FILE
|
||||
value: /vault/secrets/mas-admin-secret
|
||||
- name: MAS_ADMIN_API_BASE
|
||||
value: http://matrix-authentication-service:8081/api/admin/v1
|
||||
- name: MAS_TOKEN_URL
|
||||
value: http://matrix-authentication-service:8080/oauth2/token
|
||||
- name: SEEDER_USER
|
||||
value: othrys-seeder
|
||||
- name: PGHOST
|
||||
value: postgres-service.postgres.svc.cluster.local
|
||||
- name: PGPORT
|
||||
value: "5432"
|
||||
- name: PGDATABASE
|
||||
value: synapse
|
||||
- name: PGUSER
|
||||
value: synapse
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
set -euo pipefail
|
||||
. /vault/scripts/comms_vault_env.sh
|
||||
python - <<'PY'
|
||||
import base64
|
||||
import os
|
||||
import random
|
||||
import requests
|
||||
import time
|
||||
import urllib.parse
|
||||
import psycopg2
|
||||
|
||||
ADJ = [
|
||||
"brisk","calm","eager","gentle","merry","nifty","rapid","sunny","witty","zesty",
|
||||
"amber","bold","bright","crisp","daring","frosty","glad","jolly","lively","mellow",
|
||||
"quiet","ripe","serene","spry","tidy","vivid","warm","wild","clever","kind",
|
||||
]
|
||||
NOUN = [
|
||||
"otter","falcon","comet","ember","grove","harbor","meadow","raven","river","summit",
|
||||
"breeze","cedar","cinder","cove","delta","forest","glade","lark","marsh","peak",
|
||||
"pine","quartz","reef","ridge","sable","sage","shore","thunder","vale","zephyr",
|
||||
]
|
||||
|
||||
BASE = os.environ["SYNAPSE_BASE"]
|
||||
MAS_ADMIN_CLIENT_ID = os.environ["MAS_ADMIN_CLIENT_ID"]
|
||||
MAS_ADMIN_CLIENT_SECRET_FILE = os.environ["MAS_ADMIN_CLIENT_SECRET_FILE"]
|
||||
MAS_ADMIN_API_BASE = os.environ["MAS_ADMIN_API_BASE"].rstrip("/")
|
||||
MAS_TOKEN_URL = os.environ["MAS_TOKEN_URL"]
|
||||
SEEDER_USER = os.environ["SEEDER_USER"]
|
||||
ROOM_ALIAS = "#othrys:live.bstein.dev"
|
||||
SERVER_NAME = "live.bstein.dev"
|
||||
STALE_GUEST_MS = 14 * 24 * 60 * 60 * 1000
|
||||
|
||||
def mas_admin_token():
|
||||
with open(MAS_ADMIN_CLIENT_SECRET_FILE, "r", encoding="utf-8") as f:
|
||||
secret = f.read().strip()
|
||||
basic = base64.b64encode(f"{MAS_ADMIN_CLIENT_ID}:{secret}".encode()).decode()
|
||||
last_err = None
|
||||
for attempt in range(5):
|
||||
try:
|
||||
r = requests.post(
|
||||
MAS_TOKEN_URL,
|
||||
headers={"Authorization": f"Basic {basic}"},
|
||||
data={"grant_type": "client_credentials", "scope": "urn:mas:admin"},
|
||||
timeout=30,
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()["access_token"]
|
||||
except Exception as exc: # noqa: BLE001
|
||||
last_err = exc
|
||||
time.sleep(2 ** attempt)
|
||||
raise last_err
|
||||
|
||||
def mas_user_id(token, username):
|
||||
r = requests.get(
|
||||
f"{MAS_ADMIN_API_BASE}/users/by-username/{urllib.parse.quote(username)}",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
timeout=30,
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()["data"]["id"]
|
||||
|
||||
def mas_personal_session(token, user_id):
|
||||
r = requests.post(
|
||||
f"{MAS_ADMIN_API_BASE}/personal-sessions",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
json={
|
||||
"actor_user_id": user_id,
|
||||
"human_name": "guest-name-randomizer",
|
||||
"scope": "urn:matrix:client:api:*",
|
||||
"expires_in": 300,
|
||||
},
|
||||
timeout=30,
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json().get("data", {}).get("attributes", {}) or {}
|
||||
return data["access_token"], r.json()["data"]["id"]
|
||||
|
||||
def mas_revoke_session(token, session_id):
|
||||
requests.post(
|
||||
f"{MAS_ADMIN_API_BASE}/personal-sessions/{urllib.parse.quote(session_id)}/revoke",
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
json={},
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
def resolve_alias(token, alias):
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
enc = urllib.parse.quote(alias)
|
||||
r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=headers)
|
||||
r.raise_for_status()
|
||||
return r.json()["room_id"]
|
||||
|
||||
def room_members(token, room_id):
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
r = requests.get(f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members", headers=headers)
|
||||
r.raise_for_status()
|
||||
members = set()
|
||||
existing_names = set()
|
||||
for ev in r.json().get("chunk", []):
|
||||
user_id = ev.get("state_key")
|
||||
if user_id:
|
||||
members.add(user_id)
|
||||
disp = (ev.get("content") or {}).get("displayname")
|
||||
if disp:
|
||||
existing_names.add(disp)
|
||||
return members, existing_names
|
||||
|
||||
def mas_list_users(token):
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
users = []
|
||||
cursor = None
|
||||
while True:
|
||||
url = f"{MAS_ADMIN_API_BASE}/users?page[size]=100"
|
||||
if cursor:
|
||||
url += f"&page[after]={urllib.parse.quote(cursor)}"
|
||||
r = requests.get(url, headers=headers, timeout=30)
|
||||
r.raise_for_status()
|
||||
data = r.json().get("data", [])
|
||||
if not data:
|
||||
break
|
||||
users.extend(data)
|
||||
cursor = data[-1].get("meta", {}).get("page", {}).get("cursor")
|
||||
if not cursor:
|
||||
break
|
||||
return users
|
||||
|
||||
def synapse_list_users(token):
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
users = []
|
||||
from_token = None
|
||||
while True:
|
||||
url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100"
|
||||
if from_token:
|
||||
url += f"&from={urllib.parse.quote(from_token)}"
|
||||
r = requests.get(url, headers=headers, timeout=30)
|
||||
r.raise_for_status()
|
||||
payload = r.json()
|
||||
users.extend(payload.get("users", []))
|
||||
from_token = payload.get("next_token")
|
||||
if not from_token:
|
||||
break
|
||||
return users
|
||||
|
||||
def should_prune_guest(entry, now_ms):
|
||||
if not entry.get("is_guest"):
|
||||
return False
|
||||
last_seen = entry.get("last_seen_ts")
|
||||
if last_seen is None:
|
||||
return False
|
||||
try:
|
||||
last_seen = int(last_seen)
|
||||
except (TypeError, ValueError):
|
||||
return False
|
||||
return now_ms - last_seen > STALE_GUEST_MS
|
||||
|
||||
def prune_guest(token, user_id):
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
try:
|
||||
r = requests.delete(
|
||||
f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
|
||||
headers=headers,
|
||||
params={"erase": "true"},
|
||||
timeout=30,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
print(f"guest prune failed for {user_id}: {exc}")
|
||||
return False
|
||||
if r.status_code in (200, 202, 204, 404):
|
||||
return True
|
||||
print(f"guest prune failed for {user_id}: {r.status_code} {r.text}")
|
||||
return False
|
||||
|
||||
def user_id_for_username(username):
|
||||
return f"@{username}:live.bstein.dev"
|
||||
|
||||
def get_displayname(token, user_id):
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
r = requests.get(f"{BASE}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}", headers=headers)
|
||||
r.raise_for_status()
|
||||
return r.json().get("displayname")
|
||||
|
||||
def get_displayname_admin(token, user_id):
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
r = requests.get(
|
||||
f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
|
||||
headers=headers,
|
||||
timeout=30,
|
||||
)
|
||||
if r.status_code == 404:
|
||||
return None
|
||||
r.raise_for_status()
|
||||
return r.json().get("displayname")
|
||||
|
||||
def set_displayname(token, room_id, user_id, name, in_room):
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
payload = {"displayname": name}
|
||||
r = requests.put(
|
||||
f"{BASE}/_matrix/client/v3/profile/{urllib.parse.quote(user_id)}/displayname",
|
||||
headers=headers,
|
||||
json=payload,
|
||||
)
|
||||
r.raise_for_status()
|
||||
if not in_room:
|
||||
return
|
||||
state_url = f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.member/{urllib.parse.quote(user_id)}"
|
||||
content = {"membership": "join", "displayname": name}
|
||||
requests.put(state_url, headers=headers, json=content, timeout=30)
|
||||
|
||||
def set_displayname_admin(token, user_id, name):
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
payload = {"displayname": name}
|
||||
r = requests.put(
|
||||
f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}",
|
||||
headers=headers,
|
||||
json=payload,
|
||||
timeout=30,
|
||||
)
|
||||
if r.status_code in (200, 201, 204):
|
||||
return True
|
||||
return False
|
||||
|
||||
def needs_rename_username(username):
|
||||
return username.isdigit() or username.startswith("guest-")
|
||||
|
||||
def needs_rename_display(display):
|
||||
return not display or display.isdigit() or display.startswith("guest-")
|
||||
|
||||
def db_rename_numeric(existing_names):
|
||||
profile_rows = []
|
||||
profile_index = {}
|
||||
users = []
|
||||
conn = psycopg2.connect(
|
||||
host=os.environ["PGHOST"],
|
||||
port=int(os.environ["PGPORT"]),
|
||||
dbname=os.environ["PGDATABASE"],
|
||||
user=os.environ["PGUSER"],
|
||||
password=os.environ["PGPASSWORD"],
|
||||
)
|
||||
try:
|
||||
with conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"SELECT user_id, full_user_id, displayname FROM profiles WHERE full_user_id ~ %s",
|
||||
(f"^@\\d+:{SERVER_NAME}$",),
|
||||
)
|
||||
profile_rows = cur.fetchall()
|
||||
profile_index = {row[1]: row for row in profile_rows}
|
||||
for user_id, full_user_id, display in profile_rows:
|
||||
if display and not needs_rename_display(display):
|
||||
continue
|
||||
new = None
|
||||
for _ in range(30):
|
||||
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
|
||||
if candidate not in existing_names:
|
||||
new = candidate
|
||||
existing_names.add(candidate)
|
||||
break
|
||||
if not new:
|
||||
continue
|
||||
cur.execute(
|
||||
"UPDATE profiles SET displayname = %s WHERE full_user_id = %s",
|
||||
(new, full_user_id),
|
||||
)
|
||||
|
||||
cur.execute(
|
||||
"SELECT name FROM users WHERE name ~ %s",
|
||||
(f"^@\\d+:{SERVER_NAME}$",),
|
||||
)
|
||||
users = [row[0] for row in cur.fetchall()]
|
||||
if not users:
|
||||
return
|
||||
cur.execute(
|
||||
"SELECT user_id, full_user_id FROM profiles WHERE full_user_id = ANY(%s)",
|
||||
(users,),
|
||||
)
|
||||
for existing_full in cur.fetchall():
|
||||
profile_index.setdefault(existing_full[1], existing_full)
|
||||
|
||||
for full_user_id in users:
|
||||
if full_user_id in profile_index:
|
||||
continue
|
||||
localpart = full_user_id.split(":", 1)[0].lstrip("@")
|
||||
new = None
|
||||
for _ in range(30):
|
||||
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
|
||||
if candidate not in existing_names:
|
||||
new = candidate
|
||||
existing_names.add(candidate)
|
||||
break
|
||||
if not new:
|
||||
continue
|
||||
cur.execute(
|
||||
"INSERT INTO profiles (user_id, displayname, full_user_id) VALUES (%s, %s, %s) "
|
||||
"ON CONFLICT (full_user_id) DO UPDATE SET displayname = EXCLUDED.displayname",
|
||||
(localpart, new, full_user_id),
|
||||
)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
admin_token = mas_admin_token()
|
||||
seeder_id = mas_user_id(admin_token, SEEDER_USER)
|
||||
seeder_token, seeder_session = mas_personal_session(admin_token, seeder_id)
|
||||
try:
|
||||
room_id = resolve_alias(seeder_token, ROOM_ALIAS)
|
||||
members, existing = room_members(seeder_token, room_id)
|
||||
users = mas_list_users(admin_token)
|
||||
mas_usernames = set()
|
||||
for user in users:
|
||||
attrs = user.get("attributes") or {}
|
||||
username = attrs.get("username") or ""
|
||||
if username:
|
||||
mas_usernames.add(username)
|
||||
legacy_guest = attrs.get("legacy_guest")
|
||||
if not username:
|
||||
continue
|
||||
if not (legacy_guest or needs_rename_username(username)):
|
||||
continue
|
||||
user_id = user_id_for_username(username)
|
||||
access_token, session_id = mas_personal_session(admin_token, user["id"])
|
||||
try:
|
||||
display = get_displayname(access_token, user_id)
|
||||
if display and not needs_rename_display(display):
|
||||
continue
|
||||
new = None
|
||||
for _ in range(30):
|
||||
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
|
||||
if candidate not in existing:
|
||||
new = candidate
|
||||
existing.add(candidate)
|
||||
break
|
||||
if not new:
|
||||
continue
|
||||
set_displayname(access_token, room_id, user_id, new, user_id in members)
|
||||
finally:
|
||||
mas_revoke_session(admin_token, session_id)
|
||||
|
||||
try:
|
||||
entries = synapse_list_users(seeder_token)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
print(f"synapse admin list skipped: {exc}")
|
||||
entries = []
|
||||
now_ms = int(time.time() * 1000)
|
||||
for entry in entries:
|
||||
user_id = entry.get("name") or ""
|
||||
if not user_id.startswith("@"):
|
||||
continue
|
||||
localpart = user_id.split(":", 1)[0].lstrip("@")
|
||||
if localpart in mas_usernames:
|
||||
continue
|
||||
is_guest = entry.get("is_guest")
|
||||
if is_guest and should_prune_guest(entry, now_ms):
|
||||
if prune_guest(seeder_token, user_id):
|
||||
continue
|
||||
if not (is_guest or needs_rename_username(localpart)):
|
||||
continue
|
||||
display = get_displayname_admin(seeder_token, user_id)
|
||||
if display and not needs_rename_display(display):
|
||||
continue
|
||||
new = None
|
||||
for _ in range(30):
|
||||
candidate = f"{random.choice(ADJ)}-{random.choice(NOUN)}"
|
||||
if candidate not in existing:
|
||||
new = candidate
|
||||
existing.add(candidate)
|
||||
break
|
||||
if not new:
|
||||
continue
|
||||
if not set_displayname_admin(seeder_token, user_id, new):
|
||||
continue
|
||||
db_rename_numeric(existing)
|
||||
finally:
|
||||
mas_revoke_session(admin_token, seeder_session)
|
||||
PY
|
||||
@ -119,6 +119,7 @@ spec:
|
||||
> /synapse/config/conf.d/runtime-secrets.yaml
|
||||
nodeSelector:
|
||||
hardware: rpi5
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
@ -417,6 +418,7 @@ spec:
|
||||
|
||||
nodeSelector:
|
||||
hardware: rpi5
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
|
||||
@ -34,11 +34,7 @@ resources:
|
||||
- livekit-token-deployment.yaml
|
||||
- livekit.yaml
|
||||
- coturn.yaml
|
||||
- seed-othrys-room.yaml
|
||||
- guest-name-job.yaml
|
||||
- oneoffs/othrys-kick-numeric-job.yaml
|
||||
- pin-othrys-job.yaml
|
||||
- reset-othrys-room-job.yaml
|
||||
- oneoffs/bstein-force-leave-job.yaml
|
||||
- livekit-ingress.yaml
|
||||
- livekit-middlewares.yaml
|
||||
|
||||
@ -1,169 +0,0 @@
|
||||
# services/comms/pin-othrys-job.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: pin-othrys-invite
|
||||
namespace: comms
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "*/30 * * * *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 1
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/role: "comms"
|
||||
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
|
||||
vault.hashicorp.com/agent-inject-template-turn-secret: |
|
||||
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
|
||||
vault.hashicorp.com/agent-inject-template-livekit-primary: |
|
||||
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-bot-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-seeder-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-chat-matrix: |
|
||||
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-chat-homepage: |
|
||||
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
|
||||
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
|
||||
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
serviceAccountName: comms-vault
|
||||
containers:
|
||||
- name: pin
|
||||
image: python:3.11-slim
|
||||
env:
|
||||
- name: SYNAPSE_BASE
|
||||
value: http://othrys-synapse-matrix-synapse:8008
|
||||
- name: AUTH_BASE
|
||||
value: http://matrix-authentication-service:8080
|
||||
- name: SEEDER_USER
|
||||
value: othrys-seeder
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
set -euo pipefail
|
||||
. /vault/scripts/comms_vault_env.sh
|
||||
pip install --no-cache-dir requests >/dev/null
|
||||
python - <<'PY'
|
||||
import os, requests, urllib.parse
|
||||
|
||||
BASE = os.environ["SYNAPSE_BASE"]
|
||||
AUTH_BASE = os.environ.get("AUTH_BASE", BASE)
|
||||
ROOM_ALIAS = "#othrys:live.bstein.dev"
|
||||
MESSAGE = (
|
||||
"Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join "
|
||||
"and choose 'Continue' -> 'Join as guest'."
|
||||
)
|
||||
|
||||
def auth(token): return {"Authorization": f"Bearer {token}"}
|
||||
|
||||
def canon_user(user):
|
||||
u = (user or "").strip()
|
||||
if u.startswith("@") and ":" in u:
|
||||
return u
|
||||
u = u.lstrip("@")
|
||||
if ":" in u:
|
||||
return f"@{u}"
|
||||
return f"@{u}:live.bstein.dev"
|
||||
|
||||
def login(user, password):
|
||||
r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={
|
||||
"type": "m.login.password",
|
||||
"identifier": {"type": "m.id.user", "user": canon_user(user)},
|
||||
"password": password,
|
||||
})
|
||||
r.raise_for_status()
|
||||
return r.json()["access_token"]
|
||||
|
||||
def resolve(alias, token):
|
||||
enc = urllib.parse.quote(alias)
|
||||
r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token))
|
||||
r.raise_for_status()
|
||||
return r.json()["room_id"]
|
||||
|
||||
def get_pinned(room_id, token):
|
||||
r = requests.get(
|
||||
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events",
|
||||
headers=auth(token),
|
||||
)
|
||||
if r.status_code == 404:
|
||||
return []
|
||||
r.raise_for_status()
|
||||
return r.json().get("pinned", [])
|
||||
|
||||
def get_event(room_id, event_id, token):
|
||||
r = requests.get(
|
||||
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/event/{urllib.parse.quote(event_id)}",
|
||||
headers=auth(token),
|
||||
)
|
||||
if r.status_code == 404:
|
||||
return None
|
||||
r.raise_for_status()
|
||||
return r.json()
|
||||
|
||||
def send(room_id, token, body):
|
||||
r = requests.post(
|
||||
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message",
|
||||
headers=auth(token),
|
||||
json={"msgtype": "m.text", "body": body},
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()["event_id"]
|
||||
|
||||
def pin(room_id, token, event_id):
|
||||
r = requests.put(
|
||||
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/m.room.pinned_events",
|
||||
headers=auth(token),
|
||||
json={"pinned": [event_id]},
|
||||
)
|
||||
r.raise_for_status()
|
||||
|
||||
token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"])
|
||||
room_id = resolve(ROOM_ALIAS, token)
|
||||
for event_id in get_pinned(room_id, token):
|
||||
ev = get_event(room_id, event_id, token)
|
||||
if ev and ev.get("content", {}).get("body") == MESSAGE:
|
||||
raise SystemExit(0)
|
||||
|
||||
eid = send(room_id, token, MESSAGE)
|
||||
pin(room_id, token, eid)
|
||||
PY
|
||||
volumeMounts:
|
||||
- name: vault-scripts
|
||||
mountPath: /vault/scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: vault-scripts
|
||||
configMap:
|
||||
name: comms-vault-env
|
||||
defaultMode: 0555
|
||||
@ -1,312 +0,0 @@
|
||||
# services/comms/reset-othrys-room-job.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: othrys-room-reset
|
||||
namespace: comms
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "0 0 1 1 *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 1
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/role: "comms"
|
||||
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
|
||||
vault.hashicorp.com/agent-inject-template-turn-secret: |
|
||||
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
|
||||
vault.hashicorp.com/agent-inject-template-livekit-primary: |
|
||||
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-bot-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-seeder-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-chat-matrix: |
|
||||
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-chat-homepage: |
|
||||
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
|
||||
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
|
||||
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
serviceAccountName: comms-vault
|
||||
containers:
|
||||
- name: reset
|
||||
image: python:3.11-slim
|
||||
env:
|
||||
- name: SYNAPSE_BASE
|
||||
value: http://othrys-synapse-matrix-synapse:8008
|
||||
- name: AUTH_BASE
|
||||
value: http://matrix-authentication-service:8080
|
||||
- name: SERVER_NAME
|
||||
value: live.bstein.dev
|
||||
- name: ROOM_ALIAS
|
||||
value: "#othrys:live.bstein.dev"
|
||||
- name: ROOM_NAME
|
||||
value: Othrys
|
||||
- name: PIN_MESSAGE
|
||||
value: "Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join and choose 'Continue' -> 'Join as guest'."
|
||||
- name: SEEDER_USER
|
||||
value: othrys-seeder
|
||||
- name: BOT_USER
|
||||
value: atlasbot
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
set -euo pipefail
|
||||
. /vault/scripts/comms_vault_env.sh
|
||||
pip install --no-cache-dir requests >/dev/null
|
||||
python - <<'PY'
|
||||
import os
|
||||
import time
|
||||
import urllib.parse
|
||||
import requests
|
||||
|
||||
BASE = os.environ["SYNAPSE_BASE"]
|
||||
AUTH_BASE = os.environ.get("AUTH_BASE", BASE)
|
||||
SERVER_NAME = os.environ.get("SERVER_NAME", "live.bstein.dev")
|
||||
ROOM_ALIAS = os.environ.get("ROOM_ALIAS", "#othrys:live.bstein.dev")
|
||||
ROOM_NAME = os.environ.get("ROOM_NAME", "Othrys")
|
||||
PIN_MESSAGE = os.environ["PIN_MESSAGE"]
|
||||
SEEDER_USER = os.environ["SEEDER_USER"]
|
||||
SEEDER_PASS = os.environ["SEEDER_PASS"]
|
||||
BOT_USER = os.environ["BOT_USER"]
|
||||
|
||||
POWER_LEVELS = {
|
||||
"ban": 50,
|
||||
"events": {
|
||||
"m.room.avatar": 50,
|
||||
"m.room.canonical_alias": 50,
|
||||
"m.room.encryption": 100,
|
||||
"m.room.history_visibility": 100,
|
||||
"m.room.name": 50,
|
||||
"m.room.power_levels": 100,
|
||||
"m.room.server_acl": 100,
|
||||
"m.room.tombstone": 100,
|
||||
},
|
||||
"events_default": 0,
|
||||
"historical": 100,
|
||||
"invite": 50,
|
||||
"kick": 50,
|
||||
"m.call.invite": 50,
|
||||
"redact": 50,
|
||||
"state_default": 50,
|
||||
"users": {f"@{SEEDER_USER}:{SERVER_NAME}": 100},
|
||||
"users_default": 0,
|
||||
}
|
||||
|
||||
def auth(token):
|
||||
return {"Authorization": f"Bearer {token}"}
|
||||
|
||||
def canon_user(user):
|
||||
u = (user or "").strip()
|
||||
if u.startswith("@") and ":" in u:
|
||||
return u
|
||||
u = u.lstrip("@")
|
||||
if ":" in u:
|
||||
return f"@{u}"
|
||||
return f"@{u}:{SERVER_NAME}"
|
||||
|
||||
def login(user, password):
|
||||
r = requests.post(
|
||||
f"{AUTH_BASE}/_matrix/client/v3/login",
|
||||
json={
|
||||
"type": "m.login.password",
|
||||
"identifier": {"type": "m.id.user", "user": canon_user(user)},
|
||||
"password": password,
|
||||
},
|
||||
)
|
||||
if r.status_code != 200:
|
||||
raise SystemExit(f"login failed: {r.status_code} {r.text}")
|
||||
return r.json()["access_token"]
|
||||
|
||||
def resolve_alias(token, alias):
|
||||
enc = urllib.parse.quote(alias)
|
||||
r = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token))
|
||||
if r.status_code == 404:
|
||||
return None
|
||||
r.raise_for_status()
|
||||
return r.json()["room_id"]
|
||||
|
||||
def create_room(token):
|
||||
r = requests.post(
|
||||
f"{BASE}/_matrix/client/v3/createRoom",
|
||||
headers=auth(token),
|
||||
json={
|
||||
"preset": "public_chat",
|
||||
"name": ROOM_NAME,
|
||||
"room_version": "11",
|
||||
},
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()["room_id"]
|
||||
|
||||
def put_state(token, room_id, ev_type, content):
|
||||
r = requests.put(
|
||||
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/state/{ev_type}",
|
||||
headers=auth(token),
|
||||
json=content,
|
||||
)
|
||||
r.raise_for_status()
|
||||
|
||||
def set_directory_visibility(token, room_id, visibility):
|
||||
r = requests.put(
|
||||
f"{BASE}/_matrix/client/v3/directory/list/room/{urllib.parse.quote(room_id)}",
|
||||
headers=auth(token),
|
||||
json={"visibility": visibility},
|
||||
)
|
||||
r.raise_for_status()
|
||||
|
||||
def delete_alias(token, alias):
|
||||
enc = urllib.parse.quote(alias)
|
||||
r = requests.delete(f"{BASE}/_matrix/client/v3/directory/room/{enc}", headers=auth(token))
|
||||
if r.status_code in (200, 202, 404):
|
||||
return
|
||||
r.raise_for_status()
|
||||
|
||||
def put_alias(token, alias, room_id):
|
||||
enc = urllib.parse.quote(alias)
|
||||
r = requests.put(
|
||||
f"{BASE}/_matrix/client/v3/directory/room/{enc}",
|
||||
headers=auth(token),
|
||||
json={"room_id": room_id},
|
||||
)
|
||||
r.raise_for_status()
|
||||
|
||||
def list_joined_members(token, room_id):
|
||||
r = requests.get(
|
||||
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/members?membership=join",
|
||||
headers=auth(token),
|
||||
)
|
||||
r.raise_for_status()
|
||||
members = []
|
||||
for ev in r.json().get("chunk", []):
|
||||
if ev.get("type") != "m.room.member":
|
||||
continue
|
||||
uid = ev.get("state_key")
|
||||
if not isinstance(uid, str) or not uid.startswith("@"):
|
||||
continue
|
||||
members.append(uid)
|
||||
return members
|
||||
|
||||
def invite_user(token, room_id, user_id):
|
||||
r = requests.post(
|
||||
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/invite",
|
||||
headers=auth(token),
|
||||
json={"user_id": user_id},
|
||||
)
|
||||
if r.status_code in (200, 202):
|
||||
return
|
||||
r.raise_for_status()
|
||||
|
||||
def send_message(token, room_id, body):
|
||||
r = requests.post(
|
||||
f"{BASE}/_matrix/client/v3/rooms/{urllib.parse.quote(room_id)}/send/m.room.message",
|
||||
headers=auth(token),
|
||||
json={"msgtype": "m.text", "body": body},
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()["event_id"]
|
||||
|
||||
def login_with_retry():
|
||||
last = None
|
||||
for attempt in range(1, 6):
|
||||
try:
|
||||
return login(SEEDER_USER, SEEDER_PASS)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
last = exc
|
||||
time.sleep(attempt * 2)
|
||||
raise last
|
||||
|
||||
token = login_with_retry()
|
||||
|
||||
old_room_id = resolve_alias(token, ROOM_ALIAS)
|
||||
if not old_room_id:
|
||||
raise SystemExit(f"alias {ROOM_ALIAS} not found; refusing to proceed")
|
||||
|
||||
new_room_id = create_room(token)
|
||||
|
||||
# Configure the new room.
|
||||
put_state(token, new_room_id, "m.room.join_rules", {"join_rule": "public"})
|
||||
put_state(token, new_room_id, "m.room.guest_access", {"guest_access": "can_join"})
|
||||
put_state(token, new_room_id, "m.room.history_visibility", {"history_visibility": "shared"})
|
||||
put_state(token, new_room_id, "m.room.power_levels", POWER_LEVELS)
|
||||
|
||||
# Move the alias.
|
||||
delete_alias(token, ROOM_ALIAS)
|
||||
put_alias(token, ROOM_ALIAS, new_room_id)
|
||||
put_state(token, new_room_id, "m.room.canonical_alias", {"alias": ROOM_ALIAS})
|
||||
|
||||
set_directory_visibility(token, new_room_id, "public")
|
||||
|
||||
# Invite the bot and all joined members of the old room.
|
||||
bot_user_id = f"@{BOT_USER}:{SERVER_NAME}"
|
||||
invite_user(token, new_room_id, bot_user_id)
|
||||
for uid in list_joined_members(token, old_room_id):
|
||||
if uid == f"@{SEEDER_USER}:{SERVER_NAME}":
|
||||
continue
|
||||
localpart = uid.split(":", 1)[0].lstrip("@")
|
||||
if localpart.isdigit():
|
||||
continue
|
||||
invite_user(token, new_room_id, uid)
|
||||
|
||||
# Pin the guest invite message in the new room.
|
||||
event_id = send_message(token, new_room_id, PIN_MESSAGE)
|
||||
put_state(token, new_room_id, "m.room.pinned_events", {"pinned": [event_id]})
|
||||
|
||||
# De-list and tombstone the old room.
|
||||
set_directory_visibility(token, old_room_id, "private")
|
||||
put_state(token, old_room_id, "m.room.join_rules", {"join_rule": "invite"})
|
||||
put_state(token, old_room_id, "m.room.guest_access", {"guest_access": "forbidden"})
|
||||
put_state(
|
||||
token,
|
||||
old_room_id,
|
||||
"m.room.tombstone",
|
||||
{"body": "Othrys has been reset. Please join the new room.", "replacement_room": new_room_id},
|
||||
)
|
||||
send_message(
|
||||
token,
|
||||
old_room_id,
|
||||
"Othrys was reset. Join the new room at https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join",
|
||||
)
|
||||
|
||||
print(f"old_room_id={old_room_id}")
|
||||
print(f"new_room_id={new_room_id}")
|
||||
PY
|
||||
volumeMounts:
|
||||
- name: vault-scripts
|
||||
mountPath: /vault/scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: vault-scripts
|
||||
configMap:
|
||||
name: comms-vault-env
|
||||
defaultMode: 0555
|
||||
@ -1,185 +0,0 @@
|
||||
# services/comms/seed-othrys-room.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: seed-othrys-room
|
||||
namespace: comms
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "*/10 * * * *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/role: "comms"
|
||||
vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret"
|
||||
vault.hashicorp.com/agent-inject-template-turn-secret: |
|
||||
{{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api"
|
||||
vault.hashicorp.com/agent-inject-template-livekit-primary: |
|
||||
{{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-bot-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-seeder-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-chat-matrix: |
|
||||
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-chat-homepage: |
|
||||
{{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-mas-admin-secret: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db"
|
||||
vault.hashicorp.com/agent-inject-template-synapse-db-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db"
|
||||
vault.hashicorp.com/agent-inject-template-mas-db-pass: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-mas-matrix-shared: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime"
|
||||
vault.hashicorp.com/agent-inject-template-mas-kc-secret: |
|
||||
{{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}}
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
serviceAccountName: comms-vault
|
||||
containers:
|
||||
- name: seed
|
||||
image: python:3.11-slim
|
||||
env:
|
||||
- name: SYNAPSE_BASE
|
||||
value: http://othrys-synapse-matrix-synapse:8008
|
||||
- name: AUTH_BASE
|
||||
value: http://matrix-authentication-service:8080
|
||||
- name: SEEDER_USER
|
||||
value: othrys-seeder
|
||||
- name: BOT_USER
|
||||
value: atlasbot
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
set -euo pipefail
|
||||
. /vault/scripts/comms_vault_env.sh
|
||||
pip install --no-cache-dir requests pyyaml >/dev/null
|
||||
python - <<'PY'
|
||||
import os, requests, urllib.parse
|
||||
|
||||
BASE = os.environ["SYNAPSE_BASE"]
|
||||
AUTH_BASE = os.environ.get("AUTH_BASE", BASE)
|
||||
|
||||
def canon_user(user):
|
||||
u = (user or "").strip()
|
||||
if u.startswith("@") and ":" in u:
|
||||
return u
|
||||
u = u.lstrip("@")
|
||||
if ":" in u:
|
||||
return f"@{u}"
|
||||
return f"@{u}:live.bstein.dev"
|
||||
|
||||
def login(user, password):
|
||||
r = requests.post(f"{AUTH_BASE}/_matrix/client/v3/login", json={
|
||||
"type": "m.login.password",
|
||||
"identifier": {"type": "m.id.user", "user": canon_user(user)},
|
||||
"password": password,
|
||||
})
|
||||
if r.status_code != 200:
|
||||
raise SystemExit(f"login failed: {r.status_code} {r.text}")
|
||||
return r.json()["access_token"]
|
||||
|
||||
def ensure_user(token, localpart, password, admin):
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
user_id = f"@{localpart}:live.bstein.dev"
|
||||
url = f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}"
|
||||
res = requests.get(url, headers=headers)
|
||||
if res.status_code == 200:
|
||||
return
|
||||
payload = {"password": password, "admin": admin, "deactivated": False}
|
||||
create = requests.put(url, headers=headers, json=payload)
|
||||
if create.status_code not in (200, 201):
|
||||
raise SystemExit(f"create user {user_id} failed: {create.status_code} {create.text}")
|
||||
|
||||
def ensure_room(token):
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
alias = "#othrys:live.bstein.dev"
|
||||
alias_enc = "%23othrys%3Alive.bstein.dev"
|
||||
exists = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{alias_enc}", headers=headers)
|
||||
if exists.status_code == 200:
|
||||
room_id = exists.json()["room_id"]
|
||||
else:
|
||||
create = requests.post(f"{BASE}/_matrix/client/v3/createRoom", headers=headers, json={
|
||||
"preset": "public_chat",
|
||||
"name": "Othrys",
|
||||
"room_alias_name": "othrys",
|
||||
"initial_state": [],
|
||||
"power_level_content_override": {"events_default": 0, "users_default": 0, "state_default": 50},
|
||||
})
|
||||
if create.status_code not in (200, 409):
|
||||
raise SystemExit(f"create room failed: {create.status_code} {create.text}")
|
||||
exists = requests.get(f"{BASE}/_matrix/client/v3/directory/room/{alias_enc}", headers=headers)
|
||||
room_id = exists.json()["room_id"]
|
||||
state_events = [
|
||||
("m.room.join_rules", {"join_rule": "public"}),
|
||||
("m.room.guest_access", {"guest_access": "can_join"}),
|
||||
("m.room.history_visibility", {"history_visibility": "shared"}),
|
||||
("m.room.canonical_alias", {"alias": alias}),
|
||||
]
|
||||
for ev_type, content in state_events:
|
||||
requests.put(f"{BASE}/_matrix/client/v3/rooms/{room_id}/state/{ev_type}", headers=headers, json=content)
|
||||
requests.put(f"{BASE}/_matrix/client/v3/directory/list/room/{room_id}", headers=headers, json={"visibility": "public"})
|
||||
return room_id
|
||||
|
||||
def join_user(token, room_id, user_id):
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
requests.post(f"{BASE}/_synapse/admin/v1/join/{urllib.parse.quote(room_id)}", headers=headers, json={"user_id": user_id})
|
||||
|
||||
def join_all_locals(token, room_id):
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
users = []
|
||||
from_token = None
|
||||
while True:
|
||||
url = f"{BASE}/_synapse/admin/v2/users?local=true&deactivated=false&limit=100"
|
||||
if from_token:
|
||||
url += f"&from={from_token}"
|
||||
res = requests.get(url, headers=headers).json()
|
||||
users.extend([u["name"] for u in res.get("users", [])])
|
||||
from_token = res.get("next_token")
|
||||
if not from_token:
|
||||
break
|
||||
for uid in users:
|
||||
join_user(token, room_id, uid)
|
||||
|
||||
token = login(os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"])
|
||||
ensure_user(token, os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"], admin=True)
|
||||
ensure_user(token, os.environ["BOT_USER"], os.environ["BOT_PASS"], admin=False)
|
||||
room_id = ensure_room(token)
|
||||
join_user(token, room_id, f"@{os.environ['BOT_USER']}:live.bstein.dev")
|
||||
join_all_locals(token, room_id)
|
||||
PY
|
||||
volumeMounts:
|
||||
- name: synapse-config
|
||||
mountPath: /config
|
||||
readOnly: true
|
||||
- name: vault-scripts
|
||||
mountPath: /vault/scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: synapse-config
|
||||
secret:
|
||||
secretName: othrys-synapse-matrix-synapse
|
||||
- name: vault-scripts
|
||||
configMap:
|
||||
name: comms-vault-env
|
||||
defaultMode: 0555
|
||||
@ -1,56 +0,0 @@
|
||||
# services/finance/firefly-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: firefly-cron
|
||||
namespace: finance
|
||||
spec:
|
||||
schedule: "0 3 * * *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 1
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/role: "finance"
|
||||
vault.hashicorp.com/agent-inject-secret-firefly-cron-token: "kv/data/atlas/finance/firefly-secrets"
|
||||
vault.hashicorp.com/agent-inject-template-firefly-cron-token: |
|
||||
{{- with secret "kv/data/atlas/finance/firefly-secrets" -}}
|
||||
{{ .Data.data.STATIC_CRON_TOKEN }}
|
||||
{{- end -}}
|
||||
spec:
|
||||
serviceAccountName: finance-vault
|
||||
restartPolicy: Never
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi5"]
|
||||
- weight: 70
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi4"]
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: cron
|
||||
image: curlimages/curl:8.5.0
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -eu
|
||||
token="$(cat /vault/secrets/firefly-cron-token)"
|
||||
curl -fsS "http://firefly.finance.svc.cluster.local/api/v1/cron/${token}"
|
||||
@ -1,92 +0,0 @@
|
||||
# services/finance/firefly-user-sync-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: firefly-user-sync
|
||||
namespace: finance
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "0 6 * * *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/role: "finance"
|
||||
vault.hashicorp.com/agent-inject-secret-firefly-env.sh: "kv/data/atlas/finance/firefly-db"
|
||||
vault.hashicorp.com/agent-inject-template-firefly-env.sh: |
|
||||
{{ with secret "kv/data/atlas/finance/firefly-db" }}
|
||||
export DB_CONNECTION="pgsql"
|
||||
export DB_HOST="{{ .Data.data.DB_HOST }}"
|
||||
export DB_PORT="{{ .Data.data.DB_PORT }}"
|
||||
export DB_DATABASE="{{ .Data.data.DB_DATABASE }}"
|
||||
export DB_USERNAME="{{ .Data.data.DB_USERNAME }}"
|
||||
export DB_PASSWORD="$(cat /vault/secrets/firefly-db-password)"
|
||||
{{ end }}
|
||||
{{ with secret "kv/data/atlas/finance/firefly-secrets" }}
|
||||
export APP_KEY="$(cat /vault/secrets/firefly-app-key)"
|
||||
{{ end }}
|
||||
vault.hashicorp.com/agent-inject-secret-firefly-db-password: "kv/data/atlas/finance/firefly-db"
|
||||
vault.hashicorp.com/agent-inject-template-firefly-db-password: |
|
||||
{{- with secret "kv/data/atlas/finance/firefly-db" -}}
|
||||
{{ .Data.data.DB_PASSWORD }}
|
||||
{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-firefly-app-key: "kv/data/atlas/finance/firefly-secrets"
|
||||
vault.hashicorp.com/agent-inject-template-firefly-app-key: |
|
||||
{{- with secret "kv/data/atlas/finance/firefly-secrets" -}}
|
||||
{{ .Data.data.APP_KEY }}
|
||||
{{- end -}}
|
||||
spec:
|
||||
serviceAccountName: finance-vault
|
||||
restartPolicy: Never
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi5"]
|
||||
- weight: 70
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi4"]
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: sync
|
||||
image: fireflyiii/core:version-6.4.15
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -eu
|
||||
. /vault/secrets/firefly-env.sh
|
||||
exec php /scripts/firefly_user_sync.php
|
||||
env:
|
||||
- name: APP_ENV
|
||||
value: production
|
||||
- name: APP_DEBUG
|
||||
value: "false"
|
||||
- name: TZ
|
||||
value: Etc/UTC
|
||||
volumeMounts:
|
||||
- name: firefly-user-sync-script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: firefly-user-sync-script
|
||||
configMap:
|
||||
name: firefly-user-sync-script
|
||||
defaultMode: 0555
|
||||
@ -12,8 +12,6 @@ resources:
|
||||
- oneoffs/finance-secrets-ensure-job.yaml
|
||||
- actual-budget-deployment.yaml
|
||||
- firefly-deployment.yaml
|
||||
- firefly-user-sync-cronjob.yaml
|
||||
- firefly-cronjob.yaml
|
||||
- actual-budget-service.yaml
|
||||
- firefly-service.yaml
|
||||
- actual-budget-ingress.yaml
|
||||
@ -24,9 +22,6 @@ configMapGenerator:
|
||||
- name: actual-openid-bootstrap-script
|
||||
files:
|
||||
- actual_openid_bootstrap.mjs=scripts/actual_openid_bootstrap.mjs
|
||||
- name: firefly-user-sync-script
|
||||
files:
|
||||
- firefly_user_sync.php=scripts/firefly_user_sync.php
|
||||
- name: finance-secrets-ensure-script
|
||||
files:
|
||||
- finance_secrets_ensure.py=scripts/finance_secrets_ensure.py
|
||||
|
||||
@ -1,114 +0,0 @@
|
||||
#!/usr/bin/env php
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
use FireflyIII\Console\Commands\Correction\CreatesGroupMemberships;
|
||||
use FireflyIII\Models\Role;
|
||||
use FireflyIII\Repositories\User\UserRepositoryInterface;
|
||||
use FireflyIII\Support\Facades\FireflyConfig;
|
||||
use FireflyIII\User;
|
||||
use Illuminate\Contracts\Console\Kernel as ConsoleKernel;
|
||||
|
||||
function log_line(string $message): void
|
||||
{
|
||||
fwrite(STDOUT, $message . PHP_EOL);
|
||||
}
|
||||
|
||||
function error_line(string $message): void
|
||||
{
|
||||
fwrite(STDERR, $message . PHP_EOL);
|
||||
}
|
||||
|
||||
function find_app_root(): string
|
||||
{
|
||||
$candidates = [];
|
||||
$env_root = getenv('FIREFLY_APP_DIR') ?: '';
|
||||
if ($env_root !== '') {
|
||||
$candidates[] = $env_root;
|
||||
}
|
||||
$candidates[] = '/var/www/html';
|
||||
$candidates[] = '/var/www/firefly-iii';
|
||||
$candidates[] = '/app';
|
||||
|
||||
foreach ($candidates as $candidate) {
|
||||
if (!is_dir($candidate)) {
|
||||
continue;
|
||||
}
|
||||
if (file_exists($candidate . '/vendor/autoload.php')) {
|
||||
return $candidate;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
$email = trim((string) getenv('FIREFLY_USER_EMAIL'));
|
||||
$password = (string) getenv('FIREFLY_USER_PASSWORD');
|
||||
|
||||
if ($email === '' || $password === '') {
|
||||
error_line('missing FIREFLY_USER_EMAIL or FIREFLY_USER_PASSWORD');
|
||||
exit(1);
|
||||
}
|
||||
|
||||
$root = find_app_root();
|
||||
if ($root === '') {
|
||||
error_line('firefly app root not found');
|
||||
exit(1);
|
||||
}
|
||||
|
||||
$autoload = $root . '/vendor/autoload.php';
|
||||
$app_bootstrap = $root . '/bootstrap/app.php';
|
||||
|
||||
if (!file_exists($autoload) || !file_exists($app_bootstrap)) {
|
||||
error_line('firefly bootstrap files missing');
|
||||
exit(1);
|
||||
}
|
||||
|
||||
require $autoload;
|
||||
$app = require $app_bootstrap;
|
||||
|
||||
$kernel = $app->make(ConsoleKernel::class);
|
||||
$kernel->bootstrap();
|
||||
|
||||
try {
|
||||
FireflyConfig::set('single_user_mode', true);
|
||||
} catch (Throwable $exc) {
|
||||
error_line('failed to enforce single_user_mode: '.$exc->getMessage());
|
||||
}
|
||||
|
||||
$repository = $app->make(UserRepositoryInterface::class);
|
||||
|
||||
$existing_user = User::where('email', $email)->first();
|
||||
$first_user = User::count() == 0;
|
||||
|
||||
if (!$existing_user) {
|
||||
$existing_user = User::create(
|
||||
[
|
||||
'email' => $email,
|
||||
'password' => bcrypt($password),
|
||||
'blocked' => false,
|
||||
'blocked_code' => null,
|
||||
]
|
||||
);
|
||||
|
||||
if ($first_user) {
|
||||
$role = Role::where('name', 'owner')->first();
|
||||
if ($role) {
|
||||
$existing_user->roles()->attach($role);
|
||||
}
|
||||
}
|
||||
|
||||
log_line(sprintf('created firefly user %s', $email));
|
||||
} else {
|
||||
log_line(sprintf('updating firefly user %s', $email));
|
||||
}
|
||||
|
||||
$existing_user->blocked = false;
|
||||
$existing_user->blocked_code = null;
|
||||
$existing_user->save();
|
||||
|
||||
$repository->changePassword($existing_user, $password);
|
||||
CreatesGroupMemberships::createGroupMembership($existing_user);
|
||||
|
||||
log_line('firefly user sync complete');
|
||||
@ -8,18 +8,8 @@ resources:
|
||||
- portal-rbac.yaml
|
||||
- wger-media-pvc.yaml
|
||||
- wger-static-pvc.yaml
|
||||
- wger-admin-ensure-cronjob.yaml
|
||||
- wger-user-sync-cronjob.yaml
|
||||
- wger-deployment.yaml
|
||||
- wger-service.yaml
|
||||
- wger-ingress.yaml
|
||||
generatorOptions:
|
||||
disableNameSuffixHash: true
|
||||
configMapGenerator:
|
||||
- name: wger-nginx-config
|
||||
files:
|
||||
- default.conf=config/nginx.conf
|
||||
- nginx.conf=config/nginx-main.conf
|
||||
- name: wger-user-sync-script
|
||||
files:
|
||||
- wger_user_sync.py=scripts/wger_user_sync.py
|
||||
|
||||
@ -1,120 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import django
|
||||
|
||||
|
||||
def _env(name: str, default: str = "") -> str:
|
||||
value = os.getenv(name, default)
|
||||
return value.strip() if isinstance(value, str) else ""
|
||||
|
||||
|
||||
def _setup_django() -> None:
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings.main")
|
||||
django.setup()
|
||||
|
||||
|
||||
def _set_default_gym(user) -> None:
|
||||
try:
|
||||
from wger.gym.models import GymConfig
|
||||
except Exception:
|
||||
return
|
||||
|
||||
try:
|
||||
config = GymConfig.objects.first()
|
||||
except Exception:
|
||||
return
|
||||
|
||||
if not config or not getattr(config, "default_gym", None):
|
||||
return
|
||||
|
||||
profile = getattr(user, "userprofile", None)
|
||||
if not profile or getattr(profile, "gym", None):
|
||||
return
|
||||
|
||||
profile.gym = config.default_gym
|
||||
profile.save()
|
||||
|
||||
|
||||
def _ensure_profile(user) -> None:
|
||||
profile = getattr(user, "userprofile", None)
|
||||
if not profile:
|
||||
return
|
||||
if hasattr(profile, "email_verified") and not profile.email_verified:
|
||||
profile.email_verified = True
|
||||
if hasattr(profile, "is_temporary") and profile.is_temporary:
|
||||
profile.is_temporary = False
|
||||
profile.save()
|
||||
|
||||
|
||||
def _ensure_admin(username: str, password: str, email: str) -> None:
|
||||
from django.contrib.auth.models import User
|
||||
|
||||
if not username or not password:
|
||||
raise RuntimeError("admin username/password missing")
|
||||
|
||||
user, created = User.objects.get_or_create(username=username)
|
||||
if created:
|
||||
user.is_active = True
|
||||
if not user.is_staff:
|
||||
user.is_staff = True
|
||||
if email:
|
||||
user.email = email
|
||||
user.set_password(password)
|
||||
user.save()
|
||||
|
||||
_ensure_profile(user)
|
||||
_set_default_gym(user)
|
||||
print(f"ensured admin user {username}")
|
||||
|
||||
|
||||
def _ensure_user(username: str, password: str, email: str) -> None:
|
||||
from django.contrib.auth.models import User
|
||||
|
||||
if not username or not password:
|
||||
raise RuntimeError("username/password missing")
|
||||
|
||||
user, created = User.objects.get_or_create(username=username)
|
||||
if created:
|
||||
user.is_active = True
|
||||
if email and user.email != email:
|
||||
user.email = email
|
||||
user.set_password(password)
|
||||
user.save()
|
||||
|
||||
_ensure_profile(user)
|
||||
_set_default_gym(user)
|
||||
action = "created" if created else "updated"
|
||||
print(f"{action} user {username}")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
admin_user = _env("WGER_ADMIN_USERNAME")
|
||||
admin_password = _env("WGER_ADMIN_PASSWORD")
|
||||
admin_email = _env("WGER_ADMIN_EMAIL")
|
||||
|
||||
username = _env("WGER_USERNAME") or _env("ONLY_USERNAME")
|
||||
password = _env("WGER_PASSWORD")
|
||||
email = _env("WGER_EMAIL")
|
||||
|
||||
if not any([admin_user and admin_password, username and password]):
|
||||
print("no admin or user payload provided; exiting")
|
||||
return 0
|
||||
|
||||
_setup_django()
|
||||
|
||||
if admin_user and admin_password:
|
||||
_ensure_admin(admin_user, admin_password, admin_email)
|
||||
|
||||
if username and password:
|
||||
_ensure_user(username, password, email)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@ -1,120 +0,0 @@
|
||||
# services/health/wger-admin-ensure-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: wger-admin-ensure
|
||||
namespace: health
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "15 3 * * *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 1
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/role: "health"
|
||||
vault.hashicorp.com/agent-inject-secret-wger-env: "kv/data/atlas/health/wger-db"
|
||||
vault.hashicorp.com/agent-inject-template-wger-env: |
|
||||
{{ with secret "kv/data/atlas/health/wger-db" }}
|
||||
export DJANGO_DB_HOST="{{ .Data.data.DJANGO_DB_HOST }}"
|
||||
export DJANGO_DB_PORT="{{ .Data.data.DJANGO_DB_PORT }}"
|
||||
export DJANGO_DB_DATABASE="{{ .Data.data.DJANGO_DB_DATABASE }}"
|
||||
export DJANGO_DB_USER="{{ .Data.data.DJANGO_DB_USER }}"
|
||||
export DJANGO_DB_PASSWORD="$(cat /vault/secrets/wger-db-password)"
|
||||
{{ end }}
|
||||
{{ with secret "kv/data/atlas/health/wger-secrets" }}
|
||||
export SECRET_KEY="$(cat /vault/secrets/wger-secret-key)"
|
||||
export SIGNING_KEY="$(cat /vault/secrets/wger-signing-key)"
|
||||
{{ end }}
|
||||
{{ with secret "kv/data/atlas/health/wger-admin" }}
|
||||
export WGER_ADMIN_USERNAME="$(cat /vault/secrets/wger-admin-username)"
|
||||
export WGER_ADMIN_PASSWORD="$(cat /vault/secrets/wger-admin-password)"
|
||||
{{ end }}
|
||||
vault.hashicorp.com/agent-inject-secret-wger-db-password: "kv/data/atlas/health/wger-db"
|
||||
vault.hashicorp.com/agent-inject-template-wger-db-password: |
|
||||
{{- with secret "kv/data/atlas/health/wger-db" -}}
|
||||
{{ .Data.data.DJANGO_DB_PASSWORD }}
|
||||
{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-wger-secret-key: "kv/data/atlas/health/wger-secrets"
|
||||
vault.hashicorp.com/agent-inject-template-wger-secret-key: |
|
||||
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
|
||||
{{ .Data.data.SECRET_KEY }}
|
||||
{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-wger-signing-key: "kv/data/atlas/health/wger-secrets"
|
||||
vault.hashicorp.com/agent-inject-template-wger-signing-key: |
|
||||
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
|
||||
{{ .Data.data.SIGNING_KEY }}
|
||||
{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-wger-admin-username: "kv/data/atlas/health/wger-admin"
|
||||
vault.hashicorp.com/agent-inject-template-wger-admin-username: |
|
||||
{{- with secret "kv/data/atlas/health/wger-admin" -}}
|
||||
{{ .Data.data.username }}
|
||||
{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-wger-admin-password: "kv/data/atlas/health/wger-admin"
|
||||
vault.hashicorp.com/agent-inject-template-wger-admin-password: |
|
||||
{{- with secret "kv/data/atlas/health/wger-admin" -}}
|
||||
{{ .Data.data.password }}
|
||||
{{- end -}}
|
||||
spec:
|
||||
serviceAccountName: health-vault-sync
|
||||
restartPolicy: Never
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi5"]
|
||||
- weight: 70
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi4"]
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: ensure
|
||||
image: wger/server@sha256:710588b78af4e0aa0b4d8a8061e4563e16eae80eeaccfe7f9e0d9cbdd7f0cbc5
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -eu
|
||||
. /vault/secrets/wger-env
|
||||
exec python3 /scripts/wger_user_sync.py
|
||||
env:
|
||||
- name: SITE_URL
|
||||
value: https://health.bstein.dev
|
||||
- name: TIME_ZONE
|
||||
value: Etc/UTC
|
||||
- name: TZ
|
||||
value: Etc/UTC
|
||||
- name: DJANGO_DEBUG
|
||||
value: "False"
|
||||
- name: DJANGO_DB_ENGINE
|
||||
value: django.db.backends.postgresql
|
||||
- name: DJANGO_CACHE_BACKEND
|
||||
value: django.core.cache.backends.locmem.LocMemCache
|
||||
- name: DJANGO_CACHE_LOCATION
|
||||
value: wger-cache
|
||||
volumeMounts:
|
||||
- name: wger-user-sync-script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: wger-user-sync-script
|
||||
configMap:
|
||||
name: wger-user-sync-script
|
||||
defaultMode: 0555
|
||||
@ -1,106 +0,0 @@
|
||||
# services/health/wger-user-sync-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: wger-user-sync
|
||||
namespace: health
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "0 5 * * *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/role: "health"
|
||||
vault.hashicorp.com/agent-inject-secret-wger-env: "kv/data/atlas/health/wger-db"
|
||||
vault.hashicorp.com/agent-inject-template-wger-env: |
|
||||
{{ with secret "kv/data/atlas/health/wger-db" }}
|
||||
export DJANGO_DB_HOST="{{ .Data.data.DJANGO_DB_HOST }}"
|
||||
export DJANGO_DB_PORT="{{ .Data.data.DJANGO_DB_PORT }}"
|
||||
export DJANGO_DB_DATABASE="{{ .Data.data.DJANGO_DB_DATABASE }}"
|
||||
export DJANGO_DB_USER="{{ .Data.data.DJANGO_DB_USER }}"
|
||||
export DJANGO_DB_PASSWORD="$(cat /vault/secrets/wger-db-password)"
|
||||
{{ end }}
|
||||
{{ with secret "kv/data/atlas/health/wger-secrets" }}
|
||||
export SECRET_KEY="$(cat /vault/secrets/wger-secret-key)"
|
||||
export SIGNING_KEY="$(cat /vault/secrets/wger-signing-key)"
|
||||
{{ end }}
|
||||
vault.hashicorp.com/agent-inject-secret-wger-db-password: "kv/data/atlas/health/wger-db"
|
||||
vault.hashicorp.com/agent-inject-template-wger-db-password: |
|
||||
{{- with secret "kv/data/atlas/health/wger-db" -}}
|
||||
{{ .Data.data.DJANGO_DB_PASSWORD }}
|
||||
{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-wger-secret-key: "kv/data/atlas/health/wger-secrets"
|
||||
vault.hashicorp.com/agent-inject-template-wger-secret-key: |
|
||||
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
|
||||
{{ .Data.data.SECRET_KEY }}
|
||||
{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-wger-signing-key: "kv/data/atlas/health/wger-secrets"
|
||||
vault.hashicorp.com/agent-inject-template-wger-signing-key: |
|
||||
{{- with secret "kv/data/atlas/health/wger-secrets" -}}
|
||||
{{ .Data.data.SIGNING_KEY }}
|
||||
{{- end -}}
|
||||
spec:
|
||||
serviceAccountName: health-vault-sync
|
||||
restartPolicy: Never
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi5"]
|
||||
- weight: 70
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi4"]
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: sync
|
||||
image: wger/server@sha256:710588b78af4e0aa0b4d8a8061e4563e16eae80eeaccfe7f9e0d9cbdd7f0cbc5
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -eu
|
||||
. /vault/secrets/wger-env
|
||||
exec python3 /scripts/wger_user_sync.py
|
||||
env:
|
||||
- name: SITE_URL
|
||||
value: https://health.bstein.dev
|
||||
- name: TIME_ZONE
|
||||
value: Etc/UTC
|
||||
- name: TZ
|
||||
value: Etc/UTC
|
||||
- name: DJANGO_DEBUG
|
||||
value: "False"
|
||||
- name: DJANGO_DB_ENGINE
|
||||
value: django.db.backends.postgresql
|
||||
- name: DJANGO_CACHE_BACKEND
|
||||
value: django.core.cache.backends.locmem.LocMemCache
|
||||
- name: DJANGO_CACHE_LOCATION
|
||||
value: wger-cache
|
||||
volumeMounts:
|
||||
- name: wger-user-sync-script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: wger-user-sync-script
|
||||
configMap:
|
||||
name: wger-user-sync-script
|
||||
defaultMode: 0555
|
||||
@ -73,48 +73,6 @@ data:
|
||||
}
|
||||
}
|
||||
}
|
||||
pipelineJob('jellyfin-oidc-plugin') {
|
||||
definition {
|
||||
cpsScm {
|
||||
scm {
|
||||
git {
|
||||
remote {
|
||||
url('https://scm.bstein.dev/bstein/titan-iac.git')
|
||||
credentials('gitea-pat')
|
||||
}
|
||||
branches('*/main')
|
||||
}
|
||||
}
|
||||
scriptPath('services/jellyfin/oidc/Jenkinsfile')
|
||||
}
|
||||
}
|
||||
}
|
||||
pipelineJob('ci-demo') {
|
||||
properties {
|
||||
pipelineTriggers {
|
||||
triggers {
|
||||
scmTrigger {
|
||||
scmpoll_spec('H/1 * * * *')
|
||||
ignorePostCommitHooks(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
definition {
|
||||
cpsScm {
|
||||
scm {
|
||||
git {
|
||||
remote {
|
||||
url('https://scm.bstein.dev/bstein/ci-demo.git')
|
||||
credentials('gitea-pat')
|
||||
}
|
||||
branches('*/master')
|
||||
}
|
||||
}
|
||||
scriptPath('Jenkinsfile')
|
||||
}
|
||||
}
|
||||
}
|
||||
pipelineJob('bstein-dev-home') {
|
||||
properties {
|
||||
pipelineTriggers {
|
||||
@ -193,6 +151,84 @@ data:
|
||||
}
|
||||
}
|
||||
}
|
||||
pipelineJob('ananke') {
|
||||
properties {
|
||||
pipelineTriggers {
|
||||
triggers {
|
||||
scmTrigger {
|
||||
scmpoll_spec('H/5 * * * *')
|
||||
ignorePostCommitHooks(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
definition {
|
||||
cpsScm {
|
||||
scm {
|
||||
git {
|
||||
remote {
|
||||
url('https://scm.bstein.dev/bstein/ananke.git')
|
||||
credentials('gitea-pat')
|
||||
}
|
||||
branches('*/main')
|
||||
}
|
||||
}
|
||||
scriptPath('Jenkinsfile')
|
||||
}
|
||||
}
|
||||
}
|
||||
pipelineJob('lesavka') {
|
||||
properties {
|
||||
pipelineTriggers {
|
||||
triggers {
|
||||
scmTrigger {
|
||||
scmpoll_spec('H/5 * * * *')
|
||||
ignorePostCommitHooks(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
definition {
|
||||
cpsScm {
|
||||
scm {
|
||||
git {
|
||||
remote {
|
||||
url('https://scm.bstein.dev/bstein/lesavka.git')
|
||||
credentials('gitea-pat')
|
||||
}
|
||||
branches('*/master')
|
||||
}
|
||||
}
|
||||
scriptPath('Jenkinsfile')
|
||||
}
|
||||
}
|
||||
}
|
||||
pipelineJob('pegasus') {
|
||||
properties {
|
||||
pipelineTriggers {
|
||||
triggers {
|
||||
scmTrigger {
|
||||
scmpoll_spec('H/5 * * * *')
|
||||
ignorePostCommitHooks(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
definition {
|
||||
cpsScm {
|
||||
scm {
|
||||
git {
|
||||
remote {
|
||||
url('https://scm.bstein.dev/bstein/pegasus.git')
|
||||
credentials('gitea-pat')
|
||||
}
|
||||
branches('*/main')
|
||||
}
|
||||
}
|
||||
scriptPath('Jenkinsfile')
|
||||
}
|
||||
}
|
||||
}
|
||||
pipelineJob('data-prepper') {
|
||||
properties {
|
||||
pipelineTriggers {
|
||||
|
||||
@ -23,6 +23,7 @@ resources:
|
||||
- oneoffs/synapse-oidc-secret-ensure-job.yaml
|
||||
- oneoffs/logs-oidc-secret-ensure-job.yaml
|
||||
- oneoffs/metis-oidc-secret-ensure-job.yaml
|
||||
- oneoffs/soteria-oidc-secret-ensure-job.yaml
|
||||
- oneoffs/metis-ssh-keys-secret-ensure-job.yaml
|
||||
- oneoffs/harbor-oidc-secret-ensure-job.yaml
|
||||
- oneoffs/vault-oidc-secret-ensure-job.yaml
|
||||
|
||||
198
services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml
Normal file
198
services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml
Normal file
@ -0,0 +1,198 @@
|
||||
# services/keycloak/oneoffs/soteria-oidc-secret-ensure-job.yaml
|
||||
# One-off job for sso/soteria-oidc-secret-ensure-1.
|
||||
# Purpose: ensure the Soteria oauth2-proxy OIDC client and Vault secret exist.
|
||||
# Keep this completed Job around; bump the suffix if it ever needs to be rerun.
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: soteria-oidc-secret-ensure-1
|
||||
namespace: sso
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/role: "sso-secrets"
|
||||
vault.hashicorp.com/agent-inject-secret-keycloak-admin-env.sh: "kv/data/atlas/shared/keycloak-admin"
|
||||
vault.hashicorp.com/agent-inject-template-keycloak-admin-env.sh: |
|
||||
{{ with secret "kv/data/atlas/shared/keycloak-admin" }}
|
||||
export KEYCLOAK_ADMIN="{{ .Data.data.username }}"
|
||||
export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}"
|
||||
export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}"
|
||||
{{ end }}
|
||||
spec:
|
||||
serviceAccountName: mas-secrets-ensure
|
||||
restartPolicy: Never
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: node-role.kubernetes.io/worker
|
||||
operator: Exists
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/arch
|
||||
operator: In
|
||||
values: ["arm64"]
|
||||
containers:
|
||||
- name: apply
|
||||
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -euo pipefail
|
||||
. /vault/secrets/keycloak-admin-env.sh
|
||||
KC_URL="http://keycloak.sso.svc.cluster.local"
|
||||
ACCESS_TOKEN=""
|
||||
for attempt in 1 2 3 4 5; do
|
||||
TOKEN_JSON="$(curl -sS -X POST "$KC_URL/realms/master/protocol/openid-connect/token" \
|
||||
-H 'Content-Type: application/x-www-form-urlencoded' \
|
||||
-d "grant_type=password" \
|
||||
-d "client_id=admin-cli" \
|
||||
-d "username=${KEYCLOAK_ADMIN}" \
|
||||
-d "password=${KEYCLOAK_ADMIN_PASSWORD}" || true)"
|
||||
ACCESS_TOKEN="$(echo "$TOKEN_JSON" | jq -r '.access_token' 2>/dev/null || true)"
|
||||
if [ -n "$ACCESS_TOKEN" ] && [ "$ACCESS_TOKEN" != "null" ]; then
|
||||
break
|
||||
fi
|
||||
echo "Keycloak token request failed (attempt ${attempt})" >&2
|
||||
sleep $((attempt * 2))
|
||||
done
|
||||
if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then
|
||||
echo "Failed to fetch Keycloak admin token" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/clients?clientId=soteria" || true)"
|
||||
CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)"
|
||||
|
||||
if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then
|
||||
create_payload='{"clientId":"soteria","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://backup.bstein.dev/oauth2/callback"],"webOrigins":["https://backup.bstein.dev"],"rootUrl":"https://backup.bstein.dev","baseUrl":"/"}'
|
||||
status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
|
||||
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "${create_payload}" \
|
||||
"$KC_URL/admin/realms/atlas/clients")"
|
||||
if [ "$status" != "201" ] && [ "$status" != "204" ] && [ "$status" != "409" ]; then
|
||||
echo "Keycloak client create failed (status ${status})" >&2
|
||||
exit 1
|
||||
fi
|
||||
CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/clients?clientId=soteria" || true)"
|
||||
CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)"
|
||||
fi
|
||||
|
||||
if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then
|
||||
echo "Keycloak client soteria not found" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SCOPE_ID="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/client-scopes?search=groups" | jq -r '.[] | select(.name=="groups") | .id' 2>/dev/null | head -n1 || true)"
|
||||
if [ -z "$SCOPE_ID" ] || [ "$SCOPE_ID" = "null" ]; then
|
||||
echo "Keycloak client scope groups not found" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DEFAULT_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/default-client-scopes" || true)"
|
||||
OPTIONAL_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes" || true)"
|
||||
|
||||
if ! echo "$DEFAULT_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1 \
|
||||
&& ! echo "$OPTIONAL_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1; then
|
||||
status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \
|
||||
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")"
|
||||
if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then
|
||||
status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \
|
||||
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")"
|
||||
if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then
|
||||
echo "Failed to attach groups client scope to soteria (status ${status})" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
update_payload='{"enabled":true,"clientId":"soteria","protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://backup.bstein.dev/oauth2/callback"],"webOrigins":["https://backup.bstein.dev"],"rootUrl":"https://backup.bstein.dev","baseUrl":"/"}'
|
||||
status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \
|
||||
-H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "${update_payload}" \
|
||||
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}")"
|
||||
if [ "$status" != "204" ]; then
|
||||
echo "Keycloak client update failed (status ${status})" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CLIENT_SECRET="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \
|
||||
"$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value' 2>/dev/null || true)"
|
||||
if [ -z "$CLIENT_SECRET" ] || [ "$CLIENT_SECRET" = "null" ]; then
|
||||
echo "Keycloak client secret not found" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}"
|
||||
vault_role="${VAULT_ROLE:-sso-secrets}"
|
||||
jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
|
||||
login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')"
|
||||
vault_token="$(curl -sS --request POST --data "${login_payload}" \
|
||||
"${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')"
|
||||
if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then
|
||||
echo "vault login failed" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
read_status="$(curl -sS -o /tmp/soteria-oidc-read.json -w "%{http_code}" \
|
||||
-H "X-Vault-Token: ${vault_token}" \
|
||||
"${vault_addr}/v1/kv/data/atlas/maintenance/soteria-oidc" || true)"
|
||||
COOKIE_SECRET=""
|
||||
if [ "${read_status}" = "200" ]; then
|
||||
COOKIE_SECRET="$(jq -r '.data.data.cookie_secret // empty' /tmp/soteria-oidc-read.json)"
|
||||
elif [ "${read_status}" != "404" ]; then
|
||||
echo "Vault read failed (status ${read_status})" >&2
|
||||
cat /tmp/soteria-oidc-read.json >&2 || true
|
||||
exit 1
|
||||
fi
|
||||
if [ -n "${COOKIE_SECRET}" ]; then
|
||||
length="$(printf '%s' "${COOKIE_SECRET}" | wc -c | tr -d ' ')"
|
||||
if [ "${length}" != "16" ] && [ "${length}" != "24" ] && [ "${length}" != "32" ]; then
|
||||
COOKIE_SECRET=""
|
||||
fi
|
||||
fi
|
||||
if [ -z "${COOKIE_SECRET}" ]; then
|
||||
COOKIE_SECRET="$(openssl rand -hex 16 | tr -d '\n')"
|
||||
fi
|
||||
|
||||
payload="$(jq -nc \
|
||||
--arg client_id "soteria" \
|
||||
--arg client_secret "${CLIENT_SECRET}" \
|
||||
--arg cookie_secret "${COOKIE_SECRET}" \
|
||||
'{data:{client_id:$client_id,client_secret:$client_secret,cookie_secret:$cookie_secret}}')"
|
||||
write_status="$(curl -sS -o /tmp/soteria-oidc-write.json -w "%{http_code}" -X POST \
|
||||
-H "X-Vault-Token: ${vault_token}" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "${payload}" "${vault_addr}/v1/kv/data/atlas/maintenance/soteria-oidc")"
|
||||
if [ "${write_status}" != "200" ] && [ "${write_status}" != "204" ]; then
|
||||
echo "Vault write failed (status ${write_status})" >&2
|
||||
cat /tmp/soteria-oidc-write.json >&2 || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
verify_status="$(curl -sS -o /tmp/soteria-oidc-verify.json -w "%{http_code}" \
|
||||
-H "X-Vault-Token: ${vault_token}" \
|
||||
"${vault_addr}/v1/kv/data/atlas/maintenance/soteria-oidc" || true)"
|
||||
if [ "${verify_status}" != "200" ]; then
|
||||
echo "Vault verify failed (status ${verify_status})" >&2
|
||||
cat /tmp/soteria-oidc-verify.json >&2 || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Soteria OIDC secret ready in Vault"
|
||||
@ -31,11 +31,21 @@ spec:
|
||||
"""
|
||||
}
|
||||
}
|
||||
environment {
|
||||
SUITE_NAME = 'data_prepper'
|
||||
PUSHGATEWAY_URL = 'http://platform-quality-gateway.monitoring.svc.cluster.local:9091'
|
||||
QUALITY_GATE_SONARQUBE_REPORT = 'build/sonarqube-quality-gate.json'
|
||||
QUALITY_GATE_IRONBANK_REPORT = 'build/ironbank-compliance.json'
|
||||
}
|
||||
parameters {
|
||||
string(name: 'HARBOR_REPO', defaultValue: 'registry.bstein.dev/streaming/data-prepper', description: 'Docker repository for Data Prepper')
|
||||
string(name: 'IMAGE_TAG', defaultValue: '2.8.0', description: 'Image tag to publish')
|
||||
booleanParam(name: 'PUSH_LATEST', defaultValue: true, description: 'Also push the latest tag')
|
||||
}
|
||||
options {
|
||||
disableConcurrentBuilds()
|
||||
buildDiscarder(logRotator(daysToKeepStr: '30', numToKeepStr: '200', artifactDaysToKeepStr: '30', artifactNumToKeepStr: '120'))
|
||||
}
|
||||
stages {
|
||||
stage('Checkout') {
|
||||
steps {
|
||||
@ -44,6 +54,79 @@ spec:
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Collect quality evidence') {
|
||||
steps {
|
||||
container('git') {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
apk add --no-cache curl jq >/dev/null 2>&1 || true
|
||||
mkdir -p build
|
||||
|
||||
sonar_report="${QUALITY_GATE_SONARQUBE_REPORT:-build/sonarqube-quality-gate.json}"
|
||||
if [ ! -f "${sonar_report}" ]; then
|
||||
if [ -n "${SONARQUBE_HOST_URL:-}" ] && [ -n "${SONARQUBE_PROJECT_KEY:-}" ]; then
|
||||
host="${SONARQUBE_HOST_URL%/}"
|
||||
query="$(printf '%s' "${SONARQUBE_PROJECT_KEY}" | sed 's/ /%20/g')"
|
||||
sonar_ok=0
|
||||
if [ -n "${SONARQUBE_TOKEN:-}" ]; then
|
||||
auth="$(printf '%s:' "${SONARQUBE_TOKEN}" | base64 | tr -d '\\n')"
|
||||
if curl -fsS -H "Authorization: Basic ${auth}" "${host}/api/qualitygates/project_status?projectKey=${query}" > "${sonar_report}"; then
|
||||
sonar_ok=1
|
||||
fi
|
||||
else
|
||||
if curl -fsS "${host}/api/qualitygates/project_status?projectKey=${query}" > "${sonar_report}"; then
|
||||
sonar_ok=1
|
||||
fi
|
||||
fi
|
||||
if [ "${sonar_ok}" -ne 1 ]; then
|
||||
cat > "${sonar_report}" <<EOF
|
||||
{
|
||||
"status": "ERROR",
|
||||
"error": "sonarqube query failed"
|
||||
}
|
||||
EOF
|
||||
fi
|
||||
else
|
||||
cat > "${sonar_report}" <<EOF
|
||||
{
|
||||
"status": "ERROR",
|
||||
"note": "missing SONARQUBE_HOST_URL and/or SONARQUBE_PROJECT_KEY"
|
||||
}
|
||||
EOF
|
||||
fi
|
||||
fi
|
||||
|
||||
ironbank_report="${QUALITY_GATE_IRONBANK_REPORT:-build/ironbank-compliance.json}"
|
||||
if [ ! -f "${ironbank_report}" ]; then
|
||||
status="${IRONBANK_COMPLIANCE_STATUS:-unknown}"
|
||||
compliant="${IRONBANK_COMPLIANT:-}"
|
||||
if [ -n "${compliant}" ]; then
|
||||
compliant_lc="$(printf '%s' "${compliant}" | tr '[:upper:]' '[:lower:]')"
|
||||
compliant_json="null"
|
||||
case "${compliant_lc}" in
|
||||
1|true|yes|on) compliant_json="true" ;;
|
||||
0|false|no|off) compliant_json="false" ;;
|
||||
esac
|
||||
cat > "${ironbank_report}" <<EOF
|
||||
{
|
||||
"status": "${status}",
|
||||
"compliant": ${compliant_json},
|
||||
"note": "Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT or write build/ironbank-compliance.json in image-building repos."
|
||||
}
|
||||
EOF
|
||||
else
|
||||
cat > "${ironbank_report}" <<EOF
|
||||
{
|
||||
"status": "${status}",
|
||||
"note": "Set IRONBANK_COMPLIANCE_STATUS/IRONBANK_COMPLIANT or write build/ironbank-compliance.json in image-building repos."
|
||||
}
|
||||
EOF
|
||||
fi
|
||||
fi
|
||||
'''
|
||||
}
|
||||
}
|
||||
}
|
||||
stage('Build & Push') {
|
||||
steps {
|
||||
container('kaniko') {
|
||||
@ -80,4 +163,97 @@ EOF
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
always {
|
||||
script {
|
||||
env.QUALITY_OUTCOME = currentBuild.currentResult == 'SUCCESS' ? 'ok' : 'failed'
|
||||
}
|
||||
container('git') {
|
||||
sh '''
|
||||
set -euo pipefail
|
||||
apk add --no-cache curl jq >/dev/null 2>&1 || true
|
||||
suite="${SUITE_NAME}"
|
||||
gateway="${PUSHGATEWAY_URL}"
|
||||
status="${QUALITY_OUTCOME:-failed}"
|
||||
fetch_counter() {
|
||||
status_name="$1"
|
||||
line="$(curl -fsS "${gateway}/metrics" 2>/dev/null | awk -v suite="${suite}" -v status="${status_name}" '
|
||||
/platform_quality_gate_runs_total/ {
|
||||
if (index($0, "job=\\"platform-quality-ci\\"") && index($0, "suite=\\"" suite "\\"") && index($0, "status=\\"" status "\\"")) {
|
||||
print $2
|
||||
exit
|
||||
}
|
||||
}
|
||||
' || true)"
|
||||
[ -n "${line}" ] && printf '%s\n' "${line}" || printf '0\n'
|
||||
}
|
||||
ok_count="$(fetch_counter ok)"
|
||||
failed_count="$(fetch_counter failed)"
|
||||
if [ "${status}" = "ok" ]; then
|
||||
ok_count=$((ok_count + 1))
|
||||
else
|
||||
failed_count=$((failed_count + 1))
|
||||
fi
|
||||
sonarqube_check="not_applicable"
|
||||
if [ -f build/sonarqube-quality-gate.json ]; then
|
||||
sonar_status="$(jq -r '.status // .projectStatus.status // .qualityGate.status // empty' build/sonarqube-quality-gate.json 2>/dev/null | tr '[:upper:]' '[:lower:]')"
|
||||
if [ -n "${sonar_status}" ]; then
|
||||
case "${sonar_status}" in
|
||||
ok|pass|passed|success) sonarqube_check="ok" ;;
|
||||
*) sonarqube_check="failed" ;;
|
||||
esac
|
||||
else
|
||||
sonarqube_check="failed"
|
||||
fi
|
||||
fi
|
||||
supply_chain_check="not_applicable"
|
||||
if [ -f build/ironbank-compliance.json ]; then
|
||||
compliant="$(jq -r '.compliant // empty' build/ironbank-compliance.json 2>/dev/null)"
|
||||
if [ "${compliant}" = "true" ]; then
|
||||
supply_chain_check="ok"
|
||||
elif [ "${compliant}" = "false" ]; then
|
||||
supply_chain_check="failed"
|
||||
else
|
||||
ironbank_status="$(jq -r '.status // .result // .compliance // empty' build/ironbank-compliance.json 2>/dev/null | tr '[:upper:]' '[:lower:]')"
|
||||
case "${ironbank_status}" in
|
||||
ok|pass|passed|success|compliant) supply_chain_check="ok" ;;
|
||||
"") supply_chain_check="failed" ;;
|
||||
*) supply_chain_check="failed" ;;
|
||||
esac
|
||||
fi
|
||||
fi
|
||||
gate_glue_check="ok"
|
||||
if [ "${status}" != "ok" ]; then
|
||||
gate_glue_check="failed"
|
||||
fi
|
||||
cat <<METRICS | curl -fsS -X PUT --data-binary @- "${gateway}/metrics/job/platform-quality-ci/suite/${suite}" >/dev/null || \
|
||||
echo "warning: metrics push failed for suite=${suite}" >&2
|
||||
# TYPE platform_quality_gate_runs_total counter
|
||||
platform_quality_gate_runs_total{suite="${suite}",status="ok"} ${ok_count}
|
||||
platform_quality_gate_runs_total{suite="${suite}",status="failed"} ${failed_count}
|
||||
# TYPE data_prepper_quality_gate_tests_total gauge
|
||||
data_prepper_quality_gate_tests_total{suite="${suite}",result="passed"} 0
|
||||
data_prepper_quality_gate_tests_total{suite="${suite}",result="failed"} 0
|
||||
data_prepper_quality_gate_tests_total{suite="${suite}",result="error"} 0
|
||||
data_prepper_quality_gate_tests_total{suite="${suite}",result="skipped"} 0
|
||||
# TYPE platform_quality_gate_workspace_line_coverage_percent gauge
|
||||
platform_quality_gate_workspace_line_coverage_percent{suite="${suite}"} 0
|
||||
# TYPE platform_quality_gate_source_lines_over_500_total gauge
|
||||
platform_quality_gate_source_lines_over_500_total{suite="${suite}"} 0
|
||||
# TYPE platform_quality_gate_test_case_result gauge
|
||||
platform_quality_gate_test_case_result{suite="${suite}",test="__no_test_cases__",status="skipped"} 1
|
||||
# TYPE data_prepper_quality_gate_checks_total gauge
|
||||
data_prepper_quality_gate_checks_total{suite="${suite}",check="tests",result="not_applicable"} 1
|
||||
data_prepper_quality_gate_checks_total{suite="${suite}",check="coverage",result="not_applicable"} 1
|
||||
data_prepper_quality_gate_checks_total{suite="${suite}",check="loc",result="not_applicable"} 1
|
||||
data_prepper_quality_gate_checks_total{suite="${suite}",check="docs_naming",result="not_applicable"} 1
|
||||
data_prepper_quality_gate_checks_total{suite="${suite}",check="gate_glue",result="${gate_glue_check}"} 1
|
||||
data_prepper_quality_gate_checks_total{suite="${suite}",check="sonarqube",result="${sonarqube_check}"} 1
|
||||
data_prepper_quality_gate_checks_total{suite="${suite}",check="supply_chain",result="${supply_chain_check}"} 1
|
||||
METRICS
|
||||
'''
|
||||
}
|
||||
archiveArtifacts artifacts: 'build/**/*.json,build/**/*.xml,build/**/*.txt,build/**/*.rc', allowEmptyArchive: true, fingerprint: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -18,7 +18,6 @@ resources:
|
||||
- oneoffs/opensearch-ism-job.yaml
|
||||
- oneoffs/opensearch-dashboards-setup-job.yaml
|
||||
- oneoffs/opensearch-observability-setup-job.yaml
|
||||
- opensearch-prune-cronjob.yaml
|
||||
- fluent-bit-helmrelease.yaml
|
||||
- node-log-rotation-daemonset.yaml
|
||||
- node-image-gc-rpi4-daemonset.yaml
|
||||
@ -46,12 +45,6 @@ configMapGenerator:
|
||||
- node_image_prune_rpi5.sh=scripts/node_image_prune_rpi5.sh
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
- name: opensearch-prune-script
|
||||
namespace: logging
|
||||
files:
|
||||
- prune.py=scripts/opensearch_prune.py
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
- name: opensearch-observability-script
|
||||
namespace: logging
|
||||
files:
|
||||
|
||||
@ -1,48 +0,0 @@
|
||||
# services/logging/opensearch-prune-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: opensearch-prune
|
||||
namespace: logging
|
||||
spec:
|
||||
schedule: "23 3 * * *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 2
|
||||
template:
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
hardware: rpi5
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values:
|
||||
- rpi5
|
||||
containers:
|
||||
- name: prune
|
||||
image: python:3.11-alpine
|
||||
command: ["python", "/scripts/prune.py"]
|
||||
env:
|
||||
- name: OPENSEARCH_URL
|
||||
value: http://opensearch-master.logging.svc.cluster.local:9200
|
||||
- name: LOG_LIMIT_BYTES
|
||||
value: "1099511627776"
|
||||
- name: LOG_INDEX_PATTERNS
|
||||
value: "kube-*,journald-*,trace-analytics-*"
|
||||
volumeMounts:
|
||||
- name: scripts
|
||||
mountPath: /scripts
|
||||
volumes:
|
||||
- name: scripts
|
||||
configMap:
|
||||
name: opensearch-prune-script
|
||||
@ -1,77 +0,0 @@
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
os_url = os.environ.get("OPENSEARCH_URL", "http://opensearch-master.logging.svc.cluster.local:9200").rstrip("/")
|
||||
limit_bytes = int(os.environ.get("LOG_LIMIT_BYTES", str(1024**4)))
|
||||
patterns = [p.strip() for p in os.environ.get("LOG_INDEX_PATTERNS", "kube-*,journald-*").split(",") if p.strip()]
|
||||
|
||||
UNITS = {
|
||||
"b": 1,
|
||||
"kb": 1024,
|
||||
"mb": 1024**2,
|
||||
"gb": 1024**3,
|
||||
"tb": 1024**4,
|
||||
}
|
||||
|
||||
def parse_size(value: str) -> int:
|
||||
if not value:
|
||||
return 0
|
||||
text = value.strip().lower()
|
||||
if text in ("-", "0"):
|
||||
return 0
|
||||
match = re.match(r"^([0-9.]+)([a-z]+)$", text)
|
||||
if not match:
|
||||
return 0
|
||||
number = float(match.group(1))
|
||||
unit = match.group(2)
|
||||
if unit not in UNITS:
|
||||
return 0
|
||||
return int(number * UNITS[unit])
|
||||
|
||||
def request_json(path: str):
|
||||
url = f"{os_url}{path}"
|
||||
with urllib.request.urlopen(url, timeout=30) as response:
|
||||
payload = response.read().decode("utf-8")
|
||||
return json.loads(payload)
|
||||
|
||||
def delete_index(index: str) -> None:
|
||||
url = f"{os_url}/{index}"
|
||||
req = urllib.request.Request(url, method="DELETE")
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
_ = response.read()
|
||||
print(f"deleted {index}")
|
||||
|
||||
indices = []
|
||||
for pattern in patterns:
|
||||
try:
|
||||
data = request_json(f"/_cat/indices/{pattern}?format=json&h=index,store.size,creation.date")
|
||||
except urllib.error.HTTPError as exc:
|
||||
if exc.code == 404:
|
||||
continue
|
||||
raise
|
||||
for item in data:
|
||||
index = item.get("index")
|
||||
if not index or index.startswith("."):
|
||||
continue
|
||||
size = parse_size(item.get("store.size", ""))
|
||||
created = int(item.get("creation.date", "0") or 0)
|
||||
indices.append({"index": index, "size": size, "created": created})
|
||||
|
||||
total = sum(item["size"] for item in indices)
|
||||
print(f"total_log_bytes={total}")
|
||||
if total <= limit_bytes:
|
||||
print("within limit")
|
||||
sys.exit(0)
|
||||
|
||||
indices.sort(key=lambda item: item["created"])
|
||||
for item in indices:
|
||||
if total <= limit_bytes:
|
||||
break
|
||||
delete_index(item["index"])
|
||||
total -= item["size"]
|
||||
|
||||
print(f"remaining_log_bytes={total}")
|
||||
@ -176,6 +176,7 @@ spec:
|
||||
logLevel: DEBUG
|
||||
nodeSelector:
|
||||
hardware: rpi5
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
resources:
|
||||
requests:
|
||||
cpu: 200m
|
||||
|
||||
@ -14,7 +14,6 @@ resources:
|
||||
- serverstransport.yaml
|
||||
- ingressroute.yaml
|
||||
- oneoffs/mailu-sync-job.yaml
|
||||
- mailu-sync-cronjob.yaml
|
||||
- front-lb.yaml
|
||||
|
||||
configMapGenerator:
|
||||
|
||||
@ -1,93 +0,0 @@
|
||||
# services/mailu/mailu-sync-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: mailu-sync-nightly
|
||||
namespace: mailu-mailserver
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "30 4 * * *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/role: "mailu-mailserver"
|
||||
vault.hashicorp.com/agent-inject-secret-mailu-db-secret__database: "kv/data/atlas/mailu/mailu-db-secret"
|
||||
vault.hashicorp.com/agent-inject-template-mailu-db-secret__database: |
|
||||
{{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.database }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mailu-db-secret__username: "kv/data/atlas/mailu/mailu-db-secret"
|
||||
vault.hashicorp.com/agent-inject-template-mailu-db-secret__username: |
|
||||
{{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.username }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mailu-db-secret__password: "kv/data/atlas/mailu/mailu-db-secret"
|
||||
vault.hashicorp.com/agent-inject-template-mailu-db-secret__password: |
|
||||
{{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.password }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-id: "kv/data/atlas/mailu/mailu-sync-credentials"
|
||||
vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-id: |
|
||||
{{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-id" }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-secret: "kv/data/atlas/mailu/mailu-sync-credentials"
|
||||
vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-secret: |
|
||||
{{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-secret" }}{{- end -}}
|
||||
vault.hashicorp.com/agent-inject-secret-mailu-initial-account-secret__password: "kv/data/atlas/mailu/mailu-initial-account-secret"
|
||||
vault.hashicorp.com/agent-inject-template-mailu-initial-account-secret__password: |
|
||||
{{- with secret "kv/data/atlas/mailu/mailu-initial-account-secret" -}}{{ .Data.data.password }}{{- end -}}
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
serviceAccountName: mailu-vault-sync
|
||||
containers:
|
||||
- name: mailu-sync
|
||||
image: python:3.11-alpine
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -euo pipefail
|
||||
. /vault/scripts/mailu_vault_env.sh
|
||||
pip install --no-cache-dir requests psycopg2-binary passlib >/tmp/pip.log \
|
||||
&& python /app/sync.py
|
||||
env:
|
||||
- name: KEYCLOAK_BASE_URL
|
||||
value: http://keycloak.sso.svc.cluster.local
|
||||
- name: KEYCLOAK_REALM
|
||||
value: atlas
|
||||
- name: MAILU_DOMAIN
|
||||
value: bstein.dev
|
||||
- name: MAILU_DEFAULT_QUOTA
|
||||
value: "20000000000"
|
||||
- name: MAILU_SYSTEM_USERS
|
||||
value: "no-reply-portal@bstein.dev,no-reply-vaultwarden@bstein.dev"
|
||||
- name: MAILU_DB_HOST
|
||||
value: postgres-service.postgres.svc.cluster.local
|
||||
- name: MAILU_DB_PORT
|
||||
value: "5432"
|
||||
volumeMounts:
|
||||
- name: sync-script
|
||||
mountPath: /app/sync.py
|
||||
subPath: sync.py
|
||||
- name: vault-scripts
|
||||
mountPath: /vault/scripts
|
||||
readOnly: true
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 256Mi
|
||||
volumes:
|
||||
- name: sync-script
|
||||
configMap:
|
||||
name: mailu-sync-script
|
||||
defaultMode: 0444
|
||||
- name: vault-scripts
|
||||
configMap:
|
||||
name: mailu-vault-env
|
||||
defaultMode: 0555
|
||||
@ -7,7 +7,6 @@ Sync Keycloak users to Mailu mailboxes.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import secrets
|
||||
import string
|
||||
|
||||
60
services/maintenance/NOTES.md
Normal file
60
services/maintenance/NOTES.md
Normal file
@ -0,0 +1,60 @@
|
||||
# Soteria PVC Restore Drill (backup.bstein.dev)
|
||||
|
||||
Use this checklist after meaningful Soteria backup, restore, auth, or alerting changes.
|
||||
|
||||
## Production Restore Drill Checklist
|
||||
|
||||
1. Verify baseline health before touching restores.
|
||||
- `flux get kustomizations -n flux-system maintenance`
|
||||
- `kubectl -n maintenance get deploy soteria oauth2-proxy-soteria`
|
||||
2. Confirm operator access and source safety.
|
||||
- Operator must be in Keycloak group `admin` or `maintenance`.
|
||||
- Choose a real source PVC that is expected to be backed up, not a throwaway test PVC.
|
||||
3. Run the UI flow at `https://backup.bstein.dev`.
|
||||
- Sign in via Keycloak.
|
||||
- In `PVC Inventory`, select source namespace and PVC.
|
||||
- Click `Backup now` and wait for success in `Last Action`.
|
||||
- Click `Restore` and pick a completed snapshot.
|
||||
- Set `Target namespace` and unique `Target PVC name` (`restore-<source-pvc>-<date>`).
|
||||
- Click `Create restore PVC`.
|
||||
4. Validate restore output.
|
||||
- `kubectl -n <target-namespace> get pvc <target-pvc>`
|
||||
- If workload-level validation is required, attach a temporary pod and inspect expected files/data.
|
||||
5. Clean up.
|
||||
- `kubectl -n <target-namespace> delete pvc <target-pvc>`
|
||||
- Remove detached restore Longhorn volume from Longhorn UI/API if one remains.
|
||||
|
||||
## Alert Query Verification (`maint-soteria-*`)
|
||||
|
||||
Start a local query endpoint:
|
||||
|
||||
`kubectl -n monitoring port-forward svc/victoria-metrics-k8s-stack 8428:8428`
|
||||
|
||||
Validate each alert expression directly.
|
||||
|
||||
1. `maint-soteria-refresh-stale` (`time() - soteria_inventory_refresh_timestamp_seconds`, threshold `> 900`).
|
||||
- `curl -fsS --get 'http://127.0.0.1:8428/api/v1/query' --data-urlencode 'query=time() - soteria_inventory_refresh_timestamp_seconds'`
|
||||
- `curl -fsS --get 'http://127.0.0.1:8428/api/v1/query' --data-urlencode 'query=(time() - soteria_inventory_refresh_timestamp_seconds) > bool 900'`
|
||||
- Healthy expectation: age is below `900` and threshold query returns `0`.
|
||||
2. `maint-soteria-backup-unhealthy` (`sum((1 - pvc_backup_health{driver="longhorn"}) > bool 0) or on() vector(0)`, threshold `> 0`).
|
||||
- `curl -fsS --get 'http://127.0.0.1:8428/api/v1/query' --data-urlencode 'query=sum((1 - pvc_backup_health{driver="longhorn"}) > bool 0) or on() vector(0)'`
|
||||
- `curl -fsS --get 'http://127.0.0.1:8428/api/v1/query' --data-urlencode 'query=(1 - pvc_backup_health{driver="longhorn"}) > bool 0'`
|
||||
- `curl -fsS --get 'http://127.0.0.1:8428/api/v1/query' --data-urlencode 'query=max by (namespace,pvc) (pvc_backup_age_hours{driver="longhorn"})'`
|
||||
- Healthy expectation: unhealthy count is `0`; no series should be `1` in the per-PVC unhealthy query.
|
||||
3. `maint-soteria-authz-denials` (`sum(increase(soteria_authz_denials_total[15m])) or on() vector(0)`, threshold `> 9` for 10m).
|
||||
- `curl -fsS --get 'http://127.0.0.1:8428/api/v1/query' --data-urlencode 'query=sum(increase(soteria_authz_denials_total[15m])) or on() vector(0)'`
|
||||
- `curl -fsS --get 'http://127.0.0.1:8428/api/v1/query' --data-urlencode 'query=sum by (reason) (increase(soteria_authz_denials_total[15m]))'`
|
||||
- Healthy expectation: total remains below `10` in normal operation; spikes should map to expected `reason` labels.
|
||||
|
||||
## Failure Triage
|
||||
|
||||
- `401/403` on UI or API:
|
||||
- Verify oauth2-proxy group claims include `admin` or `maintenance`.
|
||||
- Restore conflict:
|
||||
- Target PVC already exists; choose a new target PVC name.
|
||||
- `maint-soteria-refresh-stale` firing:
|
||||
- Check Soteria pod health and `/metrics` scrape reachability from `monitoring`.
|
||||
- `maint-soteria-backup-unhealthy` firing:
|
||||
- Inspect `pvc_backup_health` and `pvc_backup_age_hours` to identify stale or missing backups.
|
||||
- `maint-soteria-authz-denials` firing:
|
||||
- Confirm expected OIDC groups and inspect denial `reason` labels for policy or header regressions.
|
||||
@ -308,9 +308,9 @@ spec:
|
||||
- name: ARIADNE_SCHEDULE_IMAGE_SWEEPER
|
||||
value: "0 */4 * * *"
|
||||
- name: ARIADNE_SCHEDULE_VAULT_K8S_AUTH
|
||||
value: "0 0 1 1 *"
|
||||
value: "*/15 * * * *"
|
||||
- name: ARIADNE_SCHEDULE_VAULT_OIDC
|
||||
value: "0 0 1 1 *"
|
||||
value: "*/15 * * * *"
|
||||
- name: ARIADNE_SCHEDULE_COMMS_GUEST_NAME
|
||||
value: "*/5 * * * *"
|
||||
- name: ARIADNE_SCHEDULE_COMMS_PIN_INVITE
|
||||
@ -345,6 +345,22 @@ spec:
|
||||
value: "15"
|
||||
- name: ARIADNE_SCHEDULE_METIS_SENTINEL_WATCH
|
||||
value: "*/30 * * * *"
|
||||
- name: ARIADNE_SCHEDULE_METIS_K3S_TOKEN_SYNC
|
||||
value: "11 */6 * * *"
|
||||
- name: ARIADNE_SCHEDULE_PLATFORM_QUALITY_SUITE_PROBE
|
||||
value: "*/15 * * * *"
|
||||
- name: ARIADNE_SCHEDULE_JENKINS_WORKSPACE_CLEANUP
|
||||
value: "45 */6 * * *"
|
||||
- name: JENKINS_WORKSPACE_NAMESPACE
|
||||
value: jenkins
|
||||
- name: JENKINS_WORKSPACE_PVC_PREFIX
|
||||
value: pvc-workspace-
|
||||
- name: JENKINS_WORKSPACE_CLEANUP_MIN_AGE_HOURS
|
||||
value: "24"
|
||||
- name: JENKINS_WORKSPACE_CLEANUP_DRY_RUN
|
||||
value: "true"
|
||||
- name: JENKINS_WORKSPACE_CLEANUP_MAX_DELETIONS_PER_RUN
|
||||
value: "20"
|
||||
- name: METRICS_PATH
|
||||
value: "/metrics"
|
||||
resources:
|
||||
|
||||
@ -16,6 +16,16 @@ rules:
|
||||
- apiGroups: [""]
|
||||
resources:
|
||||
- pods
|
||||
- persistentvolumeclaims
|
||||
- persistentvolumes
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- delete
|
||||
- apiGroups: ["longhorn.io"]
|
||||
resources:
|
||||
- volumes
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
|
||||
@ -1,53 +0,0 @@
|
||||
# services/maintenance/image-sweeper-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: image-sweeper
|
||||
namespace: maintenance
|
||||
spec:
|
||||
schedule: "30 4 * * 0"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 2
|
||||
failedJobsHistoryLimit: 2
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
serviceAccountName: node-image-sweeper
|
||||
restartPolicy: OnFailure
|
||||
nodeSelector:
|
||||
kubernetes.io/os: linux
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
- name: image-sweeper
|
||||
image: python:3.12.9-alpine3.20
|
||||
command: ["/bin/sh", "/scripts/node_image_sweeper.sh"]
|
||||
env:
|
||||
- name: ONE_SHOT
|
||||
value: "true"
|
||||
securityContext:
|
||||
privileged: true
|
||||
runAsUser: 0
|
||||
volumeMounts:
|
||||
- name: host-root
|
||||
mountPath: /host
|
||||
- name: script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: host-root
|
||||
hostPath:
|
||||
path: /
|
||||
- name: script
|
||||
configMap:
|
||||
name: node-image-sweeper-script
|
||||
defaultMode: 0555
|
||||
@ -7,10 +7,13 @@ resources:
|
||||
- secretproviderclass.yaml
|
||||
- metis-configmap.yaml
|
||||
- metis-data-pvc.yaml
|
||||
- soteria-configmap.yaml
|
||||
- vault-serviceaccount.yaml
|
||||
- vault-sync-deployment.yaml
|
||||
- ariadne-serviceaccount.yaml
|
||||
- soteria-serviceaccount.yaml
|
||||
- ariadne-rbac.yaml
|
||||
- soteria-rbac.yaml
|
||||
- disable-k3s-traefik-serviceaccount.yaml
|
||||
- disable-k3s-traefik-rbac.yaml
|
||||
- k3s-traefik-cleanup-rbac.yaml
|
||||
@ -21,20 +24,24 @@ resources:
|
||||
- pod-cleaner-rbac.yaml
|
||||
- ariadne-deployment.yaml
|
||||
- metis-deployment.yaml
|
||||
- soteria-deployment.yaml
|
||||
- oneoffs/ariadne-migrate-job.yaml
|
||||
- ariadne-service.yaml
|
||||
- soteria-service.yaml
|
||||
- disable-k3s-traefik-daemonset.yaml
|
||||
- oneoffs/k3s-traefik-cleanup-job.yaml
|
||||
- node-nofile-daemonset.yaml
|
||||
- metis-sentinel-amd64-daemonset.yaml
|
||||
- metis-sentinel-arm64-daemonset.yaml
|
||||
- metis-k3s-token-sync-cronjob.yaml
|
||||
- k3s-agent-restart-daemonset.yaml
|
||||
- pod-cleaner-cronjob.yaml
|
||||
- node-image-sweeper-serviceaccount.yaml
|
||||
- node-image-sweeper-daemonset.yaml
|
||||
- image-sweeper-cronjob.yaml
|
||||
- metis-service.yaml
|
||||
- soteria-networkpolicy.yaml
|
||||
- oauth2-proxy-soteria-networkpolicy.yaml
|
||||
- soteria-ingress.yaml
|
||||
- soteria-certificate.yaml
|
||||
- oauth2-proxy-soteria.yaml
|
||||
- oauth2-proxy-metis.yaml
|
||||
- metis-certificate.yaml
|
||||
- metis-ingress.yaml
|
||||
@ -43,6 +50,8 @@ images:
|
||||
newTag: 0.1.0-22 # {"$imagepolicy": "maintenance:ariadne:tag"}
|
||||
- name: registry.bstein.dev/bstein/metis
|
||||
newTag: 0.1.0-9-amd64
|
||||
- name: registry.bstein.dev/bstein/soteria
|
||||
newTag: 0.1.0-35 # {"$imagepolicy": "maintenance:soteria:tag"}
|
||||
configMapGenerator:
|
||||
- name: disable-k3s-traefik-script
|
||||
namespace: maintenance
|
||||
@ -62,12 +71,6 @@ configMapGenerator:
|
||||
- node_nofile.sh=scripts/node_nofile.sh
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
- name: pod-cleaner-script
|
||||
namespace: maintenance
|
||||
files:
|
||||
- pod_cleaner.sh=scripts/pod_cleaner.sh
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
- name: node-image-sweeper-script
|
||||
namespace: maintenance
|
||||
files:
|
||||
|
||||
@ -1,55 +0,0 @@
|
||||
# services/maintenance/metis-k3s-token-sync-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: metis-k3s-token-sync
|
||||
namespace: maintenance
|
||||
spec:
|
||||
schedule: "11 */6 * * *"
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 2
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
serviceAccountName: metis-token-sync
|
||||
restartPolicy: OnFailure
|
||||
nodeName: titan-0a
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
- name: sync
|
||||
image: hashicorp/vault:1.17.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
args:
|
||||
- |
|
||||
set -eu
|
||||
token="$(tr -d '\n' < /host/var/lib/rancher/k3s/server/token)"
|
||||
jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
|
||||
VAULT_TOKEN="$(vault write -field=token auth/kubernetes/login role="${VAULT_K8S_ROLE}" jwt="${jwt}")"
|
||||
export VAULT_TOKEN
|
||||
vault kv put kv/atlas/maintenance/metis-runtime k3s_token="${token}"
|
||||
env:
|
||||
- name: VAULT_ADDR
|
||||
value: http://vault.vault.svc.cluster.local:8200
|
||||
- name: VAULT_K8S_ROLE
|
||||
value: maintenance-metis-token-sync
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
volumeMounts:
|
||||
- name: k3s-server
|
||||
mountPath: /host/var/lib/rancher/k3s/server
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: k3s-server
|
||||
hostPath:
|
||||
path: /var/lib/rancher/k3s/server
|
||||
23
services/maintenance/oauth2-proxy-soteria-networkpolicy.yaml
Normal file
23
services/maintenance/oauth2-proxy-soteria-networkpolicy.yaml
Normal file
@ -0,0 +1,23 @@
|
||||
# services/maintenance/oauth2-proxy-soteria-networkpolicy.yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: oauth2-proxy-soteria-ingress
|
||||
namespace: maintenance
|
||||
spec:
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app: oauth2-proxy-soteria
|
||||
policyTypes:
|
||||
- Ingress
|
||||
ingress:
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: traefik
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app: traefik
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 4180
|
||||
120
services/maintenance/oauth2-proxy-soteria.yaml
Normal file
120
services/maintenance/oauth2-proxy-soteria.yaml
Normal file
@ -0,0 +1,120 @@
|
||||
# services/maintenance/oauth2-proxy-soteria.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: oauth2-proxy-soteria
|
||||
namespace: maintenance
|
||||
labels:
|
||||
app: oauth2-proxy-soteria
|
||||
spec:
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: 4180
|
||||
selector:
|
||||
app: oauth2-proxy-soteria
|
||||
|
||||
---
|
||||
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: oauth2-proxy-soteria
|
||||
namespace: maintenance
|
||||
labels:
|
||||
app: oauth2-proxy-soteria
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: oauth2-proxy-soteria
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: oauth2-proxy-soteria
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/role: "maintenance"
|
||||
vault.hashicorp.com/agent-inject-secret-oidc-config: "kv/data/atlas/maintenance/soteria-oidc"
|
||||
vault.hashicorp.com/agent-inject-template-oidc-config: |
|
||||
{{- with secret "kv/data/atlas/maintenance/soteria-oidc" -}}
|
||||
client_id = "{{ .Data.data.client_id }}"
|
||||
client_secret = "{{ .Data.data.client_secret }}"
|
||||
cookie_secret = "{{ .Data.data.cookie_secret }}"
|
||||
{{- end -}}
|
||||
spec:
|
||||
serviceAccountName: maintenance-vault-sync
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/arch
|
||||
operator: In
|
||||
values: ["amd64","arm64"]
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi5"]
|
||||
- weight: 100
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: NotIn
|
||||
values: ["titan-13","titan-15","titan-17","titan-19"]
|
||||
containers:
|
||||
- name: oauth2-proxy
|
||||
image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
args:
|
||||
- --provider=oidc
|
||||
- --config=/vault/secrets/oidc-config
|
||||
- --redirect-url=https://backup.bstein.dev/oauth2/callback
|
||||
- --oidc-issuer-url=https://sso.bstein.dev/realms/atlas
|
||||
- --scope=openid profile email groups
|
||||
- --email-domain=*
|
||||
- --allowed-group=admin
|
||||
- --allowed-group=/admin
|
||||
- --allowed-group=maintenance
|
||||
- --allowed-group=/maintenance
|
||||
- --set-xauthrequest=true
|
||||
- --pass-user-headers=true
|
||||
- --cookie-secure=true
|
||||
- --cookie-samesite=lax
|
||||
- --cookie-refresh=20m
|
||||
- --cookie-expire=168h
|
||||
- --insecure-oidc-allow-unverified-email=true
|
||||
- --upstream=http://soteria.maintenance.svc.cluster.local
|
||||
- --http-address=0.0.0.0:4180
|
||||
- --skip-provider-button=true
|
||||
- --approval-prompt=auto
|
||||
- --skip-jwt-bearer-tokens=true
|
||||
- --oidc-groups-claim=groups
|
||||
- --cookie-domain=backup.bstein.dev
|
||||
ports:
|
||||
- containerPort: 4180
|
||||
name: http
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ping
|
||||
port: 4180
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /ping
|
||||
port: 4180
|
||||
initialDelaySeconds: 20
|
||||
periodSeconds: 20
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 64Mi
|
||||
limits:
|
||||
cpu: 250m
|
||||
memory: 256Mi
|
||||
@ -1,36 +0,0 @@
|
||||
# services/maintenance/pod-cleaner-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: pod-cleaner
|
||||
namespace: maintenance
|
||||
spec:
|
||||
schedule: "0 * * * *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 1
|
||||
template:
|
||||
spec:
|
||||
serviceAccountName: pod-cleaner
|
||||
restartPolicy: Never
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: cleaner
|
||||
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
|
||||
command: ["/usr/bin/env", "bash"]
|
||||
args: ["/scripts/pod_cleaner.sh"]
|
||||
volumeMounts:
|
||||
- name: script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: script
|
||||
configMap:
|
||||
name: pod-cleaner-script
|
||||
defaultMode: 0555
|
||||
@ -1,12 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
for phase in Succeeded Failed; do
|
||||
kubectl get pods -A --field-selector="status.phase=${phase}" \
|
||||
-o jsonpath='{range .items[*]}{.metadata.namespace}{" "}{.metadata.name}{"\n"}{end}' \
|
||||
| while read -r namespace name; do
|
||||
if [ -n "${namespace}" ] && [ -n "${name}" ]; then
|
||||
kubectl delete pod -n "${namespace}" "${name}" --ignore-not-found --grace-period=0 --wait=false
|
||||
fi
|
||||
done
|
||||
done
|
||||
@ -13,9 +13,32 @@ spec:
|
||||
- objectName: "harbor-pull__dockerconfigjson"
|
||||
secretPath: "kv/data/atlas/shared/harbor-pull"
|
||||
secretKey: "dockerconfigjson"
|
||||
- objectName: "soteria-restic__AWS_ACCESS_KEY_ID"
|
||||
secretPath: "kv/data/atlas/shared/soteria-restic"
|
||||
secretKey: "AWS_ACCESS_KEY_ID"
|
||||
- objectName: "soteria-restic__AWS_SECRET_ACCESS_KEY"
|
||||
secretPath: "kv/data/atlas/shared/soteria-restic"
|
||||
secretKey: "AWS_SECRET_ACCESS_KEY"
|
||||
- objectName: "soteria-restic__RESTIC_PASSWORD"
|
||||
secretPath: "kv/data/atlas/shared/soteria-restic"
|
||||
secretKey: "RESTIC_PASSWORD"
|
||||
- objectName: "soteria-restic__AWS_ENDPOINTS"
|
||||
secretPath: "kv/data/atlas/shared/soteria-restic"
|
||||
secretKey: "AWS_ENDPOINTS"
|
||||
secretObjects:
|
||||
- secretName: harbor-regcred
|
||||
type: kubernetes.io/dockerconfigjson
|
||||
data:
|
||||
- objectName: harbor-pull__dockerconfigjson
|
||||
key: .dockerconfigjson
|
||||
- secretName: soteria-restic
|
||||
type: Opaque
|
||||
data:
|
||||
- objectName: soteria-restic__AWS_ACCESS_KEY_ID
|
||||
key: AWS_ACCESS_KEY_ID
|
||||
- objectName: soteria-restic__AWS_SECRET_ACCESS_KEY
|
||||
key: AWS_SECRET_ACCESS_KEY
|
||||
- objectName: soteria-restic__RESTIC_PASSWORD
|
||||
key: RESTIC_PASSWORD
|
||||
- objectName: soteria-restic__AWS_ENDPOINTS
|
||||
key: AWS_ENDPOINTS
|
||||
|
||||
13
services/maintenance/soteria-certificate.yaml
Normal file
13
services/maintenance/soteria-certificate.yaml
Normal file
@ -0,0 +1,13 @@
|
||||
# services/maintenance/soteria-certificate.yaml
|
||||
apiVersion: cert-manager.io/v1
|
||||
kind: Certificate
|
||||
metadata:
|
||||
name: backup-tls
|
||||
namespace: maintenance
|
||||
spec:
|
||||
secretName: backup-tls
|
||||
issuerRef:
|
||||
kind: ClusterIssuer
|
||||
name: letsencrypt
|
||||
dnsNames:
|
||||
- backup.bstein.dev
|
||||
30
services/maintenance/soteria-configmap.yaml
Normal file
30
services/maintenance/soteria-configmap.yaml
Normal file
@ -0,0 +1,30 @@
|
||||
# services/maintenance/soteria-configmap.yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: soteria
|
||||
namespace: maintenance
|
||||
data:
|
||||
SOTERIA_BACKUP_DRIVER: restic
|
||||
SOTERIA_RESTIC_SECRET_NAME: soteria-restic
|
||||
SOTERIA_RESTIC_REPOSITORY: s3:https://s3.us-west-004.backblazeb2.com/atlas-soteria/soteria
|
||||
SOTERIA_RESTIC_BACKUP_ARGS: --compression auto
|
||||
SOTERIA_S3_ENDPOINT: https://s3.us-west-004.backblazeb2.com
|
||||
SOTERIA_S3_REGION: us-west-004
|
||||
SOTERIA_LONGHORN_URL: http://longhorn-backend.longhorn-system.svc:9500
|
||||
SOTERIA_LONGHORN_BACKUP_MODE: incremental
|
||||
SOTERIA_AUTH_REQUIRED: "true"
|
||||
SOTERIA_ALLOWED_GROUPS: admin,maintenance
|
||||
SOTERIA_BACKUP_MAX_AGE_HOURS: "24"
|
||||
SOTERIA_METRICS_REFRESH_SECONDS: "300"
|
||||
SOTERIA_B2_ENABLED: "true"
|
||||
SOTERIA_B2_ENDPOINT: https://s3.us-west-004.backblazeb2.com
|
||||
SOTERIA_B2_REGION: us-west-004
|
||||
SOTERIA_B2_BUCKETS: atlas-soteria
|
||||
SOTERIA_B2_SECRET_NAMESPACE: maintenance
|
||||
SOTERIA_B2_SECRET_NAME: soteria-restic
|
||||
SOTERIA_B2_ACCESS_KEY_FIELD: AWS_ACCESS_KEY_ID
|
||||
SOTERIA_B2_SECRET_KEY_FIELD: AWS_SECRET_ACCESS_KEY
|
||||
SOTERIA_B2_ENDPOINT_FIELD: AWS_ENDPOINTS
|
||||
SOTERIA_B2_SCAN_INTERVAL_SECONDS: "900"
|
||||
SOTERIA_B2_SCAN_TIMEOUT_SECONDS: "120"
|
||||
76
services/maintenance/soteria-deployment.yaml
Normal file
76
services/maintenance/soteria-deployment.yaml
Normal file
@ -0,0 +1,76 @@
|
||||
# services/maintenance/soteria-deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: soteria
|
||||
namespace: maintenance
|
||||
labels:
|
||||
app: soteria
|
||||
spec:
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
app: soteria
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: soteria
|
||||
spec:
|
||||
serviceAccountName: soteria
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 90
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi5"]
|
||||
- weight: 50
|
||||
preference:
|
||||
matchExpressions:
|
||||
- key: hardware
|
||||
operator: In
|
||||
values: ["rpi4"]
|
||||
containers:
|
||||
- name: soteria
|
||||
image: registry.bstein.dev/bstein/soteria:0.1.0-21
|
||||
imagePullPolicy: Always
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: soteria
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 2
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /readyz
|
||||
port: http
|
||||
initialDelaySeconds: 2
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 2
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 256Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop: ["ALL"]
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65532
|
||||
|
||||
27
services/maintenance/soteria-ingress.yaml
Normal file
27
services/maintenance/soteria-ingress.yaml
Normal file
@ -0,0 +1,27 @@
|
||||
# services/maintenance/soteria-ingress.yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: soteria
|
||||
namespace: maintenance
|
||||
annotations:
|
||||
kubernetes.io/ingress.class: traefik
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||
traefik.ingress.kubernetes.io/router.tls: "true"
|
||||
traefik.ingress.kubernetes.io/router.middlewares: ""
|
||||
spec:
|
||||
ingressClassName: traefik
|
||||
tls:
|
||||
- hosts: ["backup.bstein.dev"]
|
||||
secretName: backup-tls
|
||||
rules:
|
||||
- host: backup.bstein.dev
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: oauth2-proxy-soteria
|
||||
port:
|
||||
number: 80
|
||||
27
services/maintenance/soteria-networkpolicy.yaml
Normal file
27
services/maintenance/soteria-networkpolicy.yaml
Normal file
@ -0,0 +1,27 @@
|
||||
# services/maintenance/soteria-networkpolicy.yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: soteria-ingress
|
||||
namespace: maintenance
|
||||
spec:
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app: soteria
|
||||
policyTypes:
|
||||
- Ingress
|
||||
ingress:
|
||||
- from:
|
||||
- podSelector:
|
||||
matchLabels:
|
||||
app: oauth2-proxy-soteria
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: monitoring
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8080
|
||||
34
services/maintenance/soteria-rbac.yaml
Normal file
34
services/maintenance/soteria-rbac.yaml
Normal file
@ -0,0 +1,34 @@
|
||||
# services/maintenance/soteria-rbac.yaml
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: soteria
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["persistentvolumeclaims", "persistentvolumes"]
|
||||
verbs: ["get", "list"]
|
||||
- apiGroups: [""]
|
||||
resources: ["pods"]
|
||||
verbs: ["get", "list"]
|
||||
- apiGroups: [""]
|
||||
resources: ["pods/log"]
|
||||
verbs: ["get"]
|
||||
- apiGroups: [""]
|
||||
resources: ["secrets"]
|
||||
verbs: ["get", "list", "create", "update", "delete"]
|
||||
- apiGroups: ["batch"]
|
||||
resources: ["jobs"]
|
||||
verbs: ["get", "list", "create", "delete"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: soteria
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: soteria
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: soteria
|
||||
namespace: maintenance
|
||||
21
services/maintenance/soteria-service.yaml
Normal file
21
services/maintenance/soteria-service.yaml
Normal file
@ -0,0 +1,21 @@
|
||||
# services/maintenance/soteria-service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: soteria
|
||||
namespace: maintenance
|
||||
labels:
|
||||
app: soteria
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "80"
|
||||
prometheus.io/path: "/metrics"
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: soteria
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: http
|
||||
|
||||
9
services/maintenance/soteria-serviceaccount.yaml
Normal file
9
services/maintenance/soteria-serviceaccount.yaml
Normal file
@ -0,0 +1,9 @@
|
||||
# services/maintenance/soteria-serviceaccount.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: soteria
|
||||
namespace: maintenance
|
||||
imagePullSecrets:
|
||||
- name: harbor-regcred
|
||||
|
||||
@ -13,7 +13,11 @@ spec:
|
||||
metadata:
|
||||
labels:
|
||||
app: maintenance-vault-sync
|
||||
annotations:
|
||||
maintenance.bstein.dev/restart-at: "2026-04-13T05:57:00Z"
|
||||
spec:
|
||||
nodeSelector:
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
serviceAccountName: maintenance-vault-sync
|
||||
containers:
|
||||
- name: sync
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -584,6 +584,44 @@
|
||||
}
|
||||
},
|
||||
"timeFrom": "30d"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"type": "timeseries",
|
||||
"title": "Astraios Usage",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 44
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"timeFrom": "30d"
|
||||
}
|
||||
],
|
||||
"time": {
|
||||
|
||||
@ -129,7 +129,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-pods dashboard",
|
||||
"title": "Open Atlas Pods",
|
||||
"url": "/d/atlas-pods",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -204,7 +204,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-pods dashboard",
|
||||
"title": "Open Atlas Pods",
|
||||
"url": "/d/atlas-pods",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -352,7 +352,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-pods dashboard",
|
||||
"title": "Open Atlas Pods",
|
||||
"url": "/d/atlas-pods",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -427,7 +427,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-pods dashboard",
|
||||
"title": "Open Atlas Pods",
|
||||
"url": "/d/atlas-pods",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -565,7 +565,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-nodes dashboard",
|
||||
"title": "Open Atlas Nodes",
|
||||
"url": "/d/atlas-nodes",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -642,7 +642,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-nodes dashboard",
|
||||
"title": "Open Atlas Nodes",
|
||||
"url": "/d/atlas-nodes",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -711,7 +711,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-nodes dashboard",
|
||||
"title": "Open Atlas Nodes",
|
||||
"url": "/d/atlas-nodes",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -780,7 +780,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-nodes dashboard",
|
||||
"title": "Open Atlas Nodes",
|
||||
"url": "/d/atlas-nodes",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -855,7 +855,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-storage dashboard",
|
||||
"title": "Open Atlas Storage",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -930,7 +930,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-storage dashboard",
|
||||
"title": "Open Atlas Storage",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -997,7 +997,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-storage dashboard",
|
||||
"title": "Open Atlas Storage",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1064,7 +1064,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-storage dashboard",
|
||||
"title": "Open Atlas Storage",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1281,11 +1281,13 @@
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "name_and_value"
|
||||
"textMode": "name_and_value",
|
||||
"orientation": "horizontal",
|
||||
"wideLayout": true
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-power dashboard",
|
||||
"title": "Open Atlas Power",
|
||||
"url": "/d/atlas-power",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1316,11 +1318,6 @@
|
||||
"refId": "B",
|
||||
"expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100)",
|
||||
"legendFormat": "Statera"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "sum((ananke_ups_load_percent{job=\"ananke-power\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\"}) / 100)",
|
||||
"legendFormat": "combined"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -1340,7 +1337,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-power dashboard",
|
||||
"title": "Open Atlas Power",
|
||||
"url": "/d/atlas-power",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1363,14 +1360,26 @@
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "max(atlas_climate_tent_temperature_celsius) or max(atlas_climate_temperature_celsius) or on() vector(0)",
|
||||
"expr": "max(typhon_temperature_celsius) or on() vector(0)",
|
||||
"legendFormat": "Tent Temp (\u00b0C)",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "max(atlas_climate_tent_pressure_kpa) or max(atlas_climate_pressure_kpa) or on() vector(0)",
|
||||
"legendFormat": "Tent Pressure (kPa)",
|
||||
"expr": "max(typhon_vpd_kpa) or on() vector(0)",
|
||||
"legendFormat": "Tent VPD (kPa)",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "max(typhon_relative_humidity_percent) or on() vector(0)",
|
||||
"legendFormat": "Tent RH (%)",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "max((243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / (17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))) or on() vector(0)",
|
||||
"legendFormat": "Dew Point (\u00b0C)",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
@ -1415,7 +1424,7 @@
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Tent Pressure (kPa)"
|
||||
"options": "Tent VPD (kPa)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
@ -1423,6 +1432,30 @@
|
||||
"value": "suffix:kPa"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Tent RH (%)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percent"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Dew Point (\u00b0C)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "celsius"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -1438,17 +1471,17 @@
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value",
|
||||
"orientation": "vertical",
|
||||
"wideLayout": false
|
||||
"orientation": "horizontal",
|
||||
"wideLayout": true
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-power dashboard",
|
||||
"title": "Open Atlas Power",
|
||||
"url": "/d/atlas-power",
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "Current tent temperature and pressure."
|
||||
"description": "Current tent temperature, humidity, VPD, and dew point."
|
||||
},
|
||||
{
|
||||
"id": 43,
|
||||
@ -1467,13 +1500,23 @@
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "(atlas_climate_tent_temperature_celsius or atlas_climate_temperature_celsius)",
|
||||
"expr": "typhon_temperature_celsius",
|
||||
"legendFormat": "Temperature (\u00b0C)"
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "(atlas_climate_tent_pressure_kpa or atlas_climate_pressure_kpa)",
|
||||
"legendFormat": "Pressure (kPa)"
|
||||
"expr": "typhon_relative_humidity_percent",
|
||||
"legendFormat": "Humidity (%)"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "typhon_vpd_kpa",
|
||||
"legendFormat": "VPD (kPa)"
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "(243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / (17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))",
|
||||
"legendFormat": "Dew Point (\u00b0C)"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -1484,7 +1527,19 @@
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Pressure (kPa)"
|
||||
"options": "Humidity (%)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percent"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "VPD (kPa)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
@ -1518,11 +1573,12 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-power dashboard",
|
||||
"title": "Open Atlas Power",
|
||||
"url": "/d/atlas-power",
|
||||
"targetBlank": true
|
||||
}
|
||||
]
|
||||
],
|
||||
"description": "Two-axis chart: tent temperature/humidity/dew point (left axis) and VPD in kPa (right axis)."
|
||||
},
|
||||
{
|
||||
"id": 140,
|
||||
@ -1541,25 +1597,25 @@
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"outlet\"}) or max(atlas_climate_fan_activity_level{position=\"outlet\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"outlet\"}) or on() vector(0))",
|
||||
"legendFormat": "Inside Outlet",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"inside_inlet\"}) or max(atlas_climate_fan_activity_level{position=\"inside_inlet\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"inside_inlet\"}) or on() vector(0))",
|
||||
"legendFormat": "Inside Inlet",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"outside_inlet\"}) or max(atlas_climate_fan_activity_level{position=\"outside_inlet\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"outside_inlet\"}) or on() vector(0))",
|
||||
"legendFormat": "Outside Inlet",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"interior\"}) or max(atlas_climate_fan_activity_level{position=\"interior\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"interior\"}) or on() vector(0))",
|
||||
"legendFormat": "Interior Fans",
|
||||
"instant": true
|
||||
}
|
||||
@ -1606,11 +1662,13 @@
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "name_and_value"
|
||||
"textMode": "name_and_value",
|
||||
"orientation": "horizontal",
|
||||
"wideLayout": true
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-power dashboard",
|
||||
"title": "Open Atlas Power",
|
||||
"url": "/d/atlas-power",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1633,22 +1691,22 @@
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"outlet\"} or atlas_climate_fan_activity_level{position=\"outlet\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"outlet\"}",
|
||||
"legendFormat": "Inside Outlet"
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"inside_inlet\"} or atlas_climate_fan_activity_level{position=\"inside_inlet\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"inside_inlet\"}",
|
||||
"legendFormat": "Inside Inlet"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"outside_inlet\"} or atlas_climate_fan_activity_level{position=\"outside_inlet\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"outside_inlet\"}",
|
||||
"legendFormat": "Outside Inlet"
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"interior\"} or atlas_climate_fan_activity_level{position=\"interior\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"interior\"}",
|
||||
"legendFormat": "Interior Fans"
|
||||
}
|
||||
],
|
||||
@ -1670,7 +1728,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-power dashboard",
|
||||
"title": "Open Atlas Power",
|
||||
"url": "/d/atlas-power",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1741,7 +1799,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-jobs dashboard",
|
||||
"title": "Open Atlas Testing",
|
||||
"url": "/d/atlas-jobs",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1838,7 +1896,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-jobs dashboard",
|
||||
"title": "Open Atlas Testing",
|
||||
"url": "/d/atlas-jobs",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1861,53 +1919,58 @@
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[1h]))) / clamp_min((sum(increase(ariadne_task_runs_total[1h]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "ariadne"
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[1h])) + sum(increase(metis_flashes_total{status=\"ok\"}[1h])))) / clamp_min(((sum(increase(metis_builds_total[1h])) + sum(increase(metis_flashes_total[1h])))), 1)) and on() (((sum(increase(metis_builds_total[1h])) + sum(increase(metis_flashes_total[1h])))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "metis"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[1h]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "ananke"
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "atlasbot"
|
||||
},
|
||||
{
|
||||
"refId": "E",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "lesavka"
|
||||
},
|
||||
{
|
||||
"refId": "F",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "pegasus"
|
||||
},
|
||||
{
|
||||
"refId": "G",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "soteria"
|
||||
},
|
||||
{
|
||||
"refId": "H",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "titan-iac"
|
||||
},
|
||||
{
|
||||
"refId": "I",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "bstein-home"
|
||||
},
|
||||
{
|
||||
"refId": "J",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "arcanagon"
|
||||
},
|
||||
{
|
||||
"refId": "K",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "data-prepper"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -1941,7 +2004,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-jobs dashboard",
|
||||
"title": "Open Atlas Testing",
|
||||
"url": "/d/atlas-jobs",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1952,7 +2015,7 @@
|
||||
{
|
||||
"id": 47,
|
||||
"type": "bargauge",
|
||||
"title": "Platform Suite Pass Rate (24h)",
|
||||
"title": "PVC Backup Health / Age",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -1965,31 +2028,35 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc((label_replace((100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[24h]))) / clamp_min((sum(increase(ariadne_task_runs_total[24h]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[24h]))) > 0), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace((100 * ((sum(increase(metis_builds_total{status=\"ok\"}[24h])) + sum(increase(metis_flashes_total{status=\"ok\"}[24h])))) / clamp_min(((sum(increase(metis_builds_total[24h])) + sum(increase(metis_flashes_total[24h])))), 1)) and on() (((sum(increase(metis_builds_total[24h])) + sum(increase(metis_flashes_total[24h])))) > 0), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace((100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[24h]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[24h]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[24h]))) > 0), \"suite\", \"ananke\", \"__name__\", \".*\")) or ((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total[24h]))) > 0)))",
|
||||
"expr": "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours or on(namespace, pvc) ((1 - pvc_backup_health) * 999)))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{suite}}",
|
||||
"legendFormat": "{{namespace}}/{{pvc}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"unit": "h",
|
||||
"min": 0,
|
||||
"max": 100,
|
||||
"max": null,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 80
|
||||
"value": 20
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 95
|
||||
"color": "orange",
|
||||
"value": 40
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 50
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -2020,12 +2087,12 @@
|
||||
],
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-jobs dashboard",
|
||||
"url": "/d/atlas-jobs",
|
||||
"title": "Open Atlas Storage",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "24-hour per-suite pass-rate snapshot. This complements the 7-day trend by showing each suite's current quality posture."
|
||||
"description": "Oldest successful backup age in hours by PVC with nightly cadence thresholds (green <=20h, yellow <40h, orange <50h, red >=50h). PVCs with missing or unhealthy backup state are forced to 999h so critical bars stay visible."
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
@ -2088,7 +2155,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-mail dashboard",
|
||||
"title": "Open Atlas Mail",
|
||||
"url": "/d/atlas-mail",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -2193,7 +2260,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-mail dashboard",
|
||||
"title": "Open Atlas Mail",
|
||||
"url": "/d/atlas-mail",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -2269,7 +2336,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-mail dashboard",
|
||||
"title": "Open Atlas Mail",
|
||||
"url": "/d/atlas-mail",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -2345,7 +2412,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-mail dashboard",
|
||||
"title": "Open Atlas Mail",
|
||||
"url": "/d/atlas-mail",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -2725,7 +2792,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-nodes dashboard",
|
||||
"title": "Open Atlas Nodes",
|
||||
"url": "/d/atlas-nodes",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -2772,7 +2839,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-nodes dashboard",
|
||||
"title": "Open Atlas Nodes",
|
||||
"url": "/d/atlas-nodes",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -3022,7 +3089,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-network dashboard",
|
||||
"title": "Open Atlas Network",
|
||||
"url": "/d/atlas-network",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -3066,7 +3133,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-network dashboard",
|
||||
"title": "Open Atlas Network",
|
||||
"url": "/d/atlas-network",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -3110,7 +3177,7 @@
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-network dashboard",
|
||||
"title": "Open Atlas Network",
|
||||
"url": "/d/atlas-network",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -3158,7 +3225,7 @@
|
||||
"timeFrom": "30d",
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-storage dashboard",
|
||||
"title": "Open Atlas Storage",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -3166,8 +3233,8 @@
|
||||
},
|
||||
{
|
||||
"id": 22,
|
||||
"type": "bargauge",
|
||||
"title": "Nodes Closest to Full Root Disks",
|
||||
"type": "timeseries",
|
||||
"title": "Nodes Closest to Full Astraios Disks",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -3180,68 +3247,36 @@
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(topk(12, avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))))",
|
||||
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 50
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 75
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 91.5
|
||||
}
|
||||
]
|
||||
}
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
"last"
|
||||
]
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"timeFrom": "1w",
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-storage dashboard",
|
||||
"title": "Open Atlas Storage",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"transformations": [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -3362,5 +3397,11 @@
|
||||
"to": "now"
|
||||
},
|
||||
"refresh": "1m",
|
||||
"links": []
|
||||
"links": [
|
||||
{
|
||||
"title": "Atlas Testing (Internal)",
|
||||
"url": "/d/atlas-jobs",
|
||||
"targetBlank": false
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@ -215,7 +215,9 @@
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "name_and_value"
|
||||
"textMode": "name_and_value",
|
||||
"orientation": "horizontal",
|
||||
"wideLayout": true
|
||||
},
|
||||
"description": "Per-UPS live snapshot: current draw in watts, estimated battery runtime if discharge started now, and charging/discharging status."
|
||||
},
|
||||
@ -243,11 +245,6 @@
|
||||
"refId": "B",
|
||||
"expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100)",
|
||||
"legendFormat": "Statera"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "sum((ananke_ups_load_percent{job=\"ananke-power\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\"}) / 100)",
|
||||
"legendFormat": "combined"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -265,7 +262,7 @@
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"description": "Historical UPS power consumption in watts for titan-db, tethys, and combined load."
|
||||
"description": "Historical UPS power consumption in watts for titan-db and tethys."
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
@ -284,14 +281,26 @@
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "max(atlas_climate_tent_temperature_celsius) or max(atlas_climate_temperature_celsius) or on() vector(0)",
|
||||
"expr": "max(typhon_temperature_celsius) or on() vector(0)",
|
||||
"legendFormat": "Tent Temp (\u00b0C)",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "max(atlas_climate_tent_pressure_kpa) or max(atlas_climate_pressure_kpa) or on() vector(0)",
|
||||
"legendFormat": "Tent Pressure (kPa)",
|
||||
"expr": "max(typhon_vpd_kpa) or on() vector(0)",
|
||||
"legendFormat": "Tent VPD (kPa)",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "max(typhon_relative_humidity_percent) or on() vector(0)",
|
||||
"legendFormat": "Tent RH (%)",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "max((243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / (17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))) or on() vector(0)",
|
||||
"legendFormat": "Dew Point (\u00b0C)",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
@ -336,12 +345,36 @@
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Tent Pressure (kPa)"
|
||||
"options": "Tent VPD (kPa)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "none"
|
||||
"value": "suffix:kPa"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Tent RH (%)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percent"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Dew Point (\u00b0C)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "celsius"
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -358,9 +391,11 @@
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "name_and_value"
|
||||
"textMode": "name_and_value",
|
||||
"orientation": "horizontal",
|
||||
"wideLayout": true
|
||||
},
|
||||
"description": "Current tent temperature and air pressure. These render once climate telemetry is online."
|
||||
"description": "Current tent temperature, humidity, VPD, and dew point. These render once Typhon climate telemetry is online."
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
@ -379,13 +414,23 @@
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "(atlas_climate_tent_temperature_celsius or atlas_climate_temperature_celsius)",
|
||||
"expr": "typhon_temperature_celsius",
|
||||
"legendFormat": "Temperature (\u00b0C)"
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "(atlas_climate_tent_pressure_kpa or atlas_climate_pressure_kpa)",
|
||||
"legendFormat": "Pressure (kPa)"
|
||||
"expr": "typhon_relative_humidity_percent",
|
||||
"legendFormat": "Humidity (%)"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "typhon_vpd_kpa",
|
||||
"legendFormat": "VPD (kPa)"
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "(243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / (17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))",
|
||||
"legendFormat": "Dew Point (\u00b0C)"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -396,7 +441,19 @@
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Pressure (kPa)"
|
||||
"options": "Humidity (%)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percent"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "VPD (kPa)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
@ -428,7 +485,7 @@
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"description": "Two-axis chart: tent temperature (left axis) and tent pressure in kPa (right axis)."
|
||||
"description": "Two-axis chart: tent temperature/humidity/dew point (left axis) and tent VPD in kPa (right axis)."
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
@ -447,25 +504,25 @@
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"outlet\"}) or max(atlas_climate_fan_activity_level{position=\"outlet\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"outlet\"}) or on() vector(0))",
|
||||
"legendFormat": "Inside Outlet",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"inside_inlet\"}) or max(atlas_climate_fan_activity_level{position=\"inside_inlet\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"inside_inlet\"}) or on() vector(0))",
|
||||
"legendFormat": "Inside Inlet",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"outside_inlet\"}) or max(atlas_climate_fan_activity_level{position=\"outside_inlet\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"outside_inlet\"}) or on() vector(0))",
|
||||
"legendFormat": "Outside Inlet",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"interior\"}) or max(atlas_climate_fan_activity_level{position=\"interior\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"interior\"}) or on() vector(0))",
|
||||
"legendFormat": "Interior Fans",
|
||||
"instant": true
|
||||
}
|
||||
@ -512,7 +569,9 @@
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "name_and_value"
|
||||
"textMode": "name_and_value",
|
||||
"orientation": "horizontal",
|
||||
"wideLayout": true
|
||||
},
|
||||
"description": "Current fan activity levels (0-10): inside outlet, inside inlet, outside inlet, and interior fans."
|
||||
},
|
||||
@ -533,22 +592,22 @@
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"outlet\"} or atlas_climate_fan_activity_level{position=\"outlet\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"outlet\"}",
|
||||
"legendFormat": "Inside Outlet"
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"inside_inlet\"} or atlas_climate_fan_activity_level{position=\"inside_inlet\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"inside_inlet\"}",
|
||||
"legendFormat": "Inside Inlet"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"outside_inlet\"} or atlas_climate_fan_activity_level{position=\"outside_inlet\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"outside_inlet\"}",
|
||||
"legendFormat": "Outside Inlet"
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"interior\"} or atlas_climate_fan_activity_level{position=\"interior\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"interior\"}",
|
||||
"legendFormat": "Interior Fans"
|
||||
}
|
||||
],
|
||||
|
||||
@ -131,7 +131,7 @@ data:
|
||||
type: threshold
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: [3]
|
||||
params: [2]
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
@ -447,6 +447,198 @@ data:
|
||||
summary: "Legacy cronjob alert disabled"
|
||||
labels:
|
||||
severity: info
|
||||
- uid: maint-soteria-refresh-stale
|
||||
title: "Soteria inventory refresh stale (>15m)"
|
||||
condition: C
|
||||
for: "15m"
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 900
|
||||
to: 0
|
||||
datasourceUid: atlas-vm
|
||||
model:
|
||||
expr: time() - soteria_inventory_refresh_timestamp_seconds
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
legendFormat: soteria-refresh-age-seconds
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: atlas-vm
|
||||
- refId: B
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: A
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
type: reduce
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: B
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
type: threshold
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: [900]
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
reducer:
|
||||
type: last
|
||||
type: query
|
||||
noDataState: Alerting
|
||||
execErrState: Alerting
|
||||
annotations:
|
||||
summary: "Soteria inventory telemetry has not refreshed in >15m"
|
||||
labels:
|
||||
severity: warning
|
||||
- uid: maint-soteria-backup-unhealthy
|
||||
title: "Soteria reports unhealthy PVC backups"
|
||||
condition: C
|
||||
for: "10m"
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: atlas-vm
|
||||
model:
|
||||
expr: sum((1 - pvc_backup_health) > bool 0) or on() vector(0)
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
legendFormat: unhealthy-pvcs
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: atlas-vm
|
||||
- refId: B
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: A
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
type: reduce
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: B
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
type: threshold
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: [0]
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
reducer:
|
||||
type: last
|
||||
type: query
|
||||
noDataState: OK
|
||||
execErrState: Alerting
|
||||
annotations:
|
||||
summary: "One or more PVCs are stale, missing, or failed per Soteria backup health"
|
||||
labels:
|
||||
severity: warning
|
||||
- uid: maint-soteria-b2-scan-unhealthy
|
||||
title: "Soteria B2 usage scan failing or stale"
|
||||
condition: C
|
||||
for: "15m"
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 1800
|
||||
to: 0
|
||||
datasourceUid: atlas-vm
|
||||
model:
|
||||
expr: sum((((soteria_b2_scan_success < bool 1) and (time() - soteria_b2_scan_timestamp_seconds > 600)) or (time() - soteria_b2_scan_timestamp_seconds > 1800))) or on() vector(0)
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
legendFormat: soteria-b2-scan-unhealthy
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: atlas-vm
|
||||
- refId: B
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: A
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
type: reduce
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: B
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
type: threshold
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: [0]
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
reducer:
|
||||
type: last
|
||||
type: query
|
||||
noDataState: OK
|
||||
execErrState: Alerting
|
||||
annotations:
|
||||
summary: "Soteria B2 consumption scan is failing or stale for >15m"
|
||||
labels:
|
||||
severity: warning
|
||||
- uid: maint-soteria-authz-denials
|
||||
title: "Soteria authorization denials elevated"
|
||||
condition: C
|
||||
for: "10m"
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 900
|
||||
to: 0
|
||||
datasourceUid: atlas-vm
|
||||
model:
|
||||
expr: sum(increase(soteria_authz_denials_total[15m])) or on() vector(0)
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
legendFormat: soteria-authz-denials-15m
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: atlas-vm
|
||||
- refId: B
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: A
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
type: reduce
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: B
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
type: threshold
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: [9]
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
reducer:
|
||||
type: last
|
||||
type: query
|
||||
noDataState: OK
|
||||
execErrState: Alerting
|
||||
annotations:
|
||||
summary: "Soteria saw >10 authorization denials in 15m"
|
||||
labels:
|
||||
severity: warning
|
||||
- orgId: 1
|
||||
name: ariadne
|
||||
folder: Alerts
|
||||
@ -649,3 +841,440 @@ data:
|
||||
summary: "Postmark exporter reports sustained API outage"
|
||||
labels:
|
||||
severity: warning
|
||||
- orgId: 1
|
||||
name: typhon
|
||||
folder: Alerts
|
||||
interval: 1m
|
||||
rules:
|
||||
- uid: typhon-exporter-down
|
||||
title: "Typhon exporter down (>10m)"
|
||||
condition: C
|
||||
for: "10m"
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: atlas-vm
|
||||
model:
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
expr: max(typhon_up) or on() vector(0)
|
||||
legendFormat: typhon_up
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: atlas-vm
|
||||
- refId: B
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: A
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
type: reduce
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: B
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
type: threshold
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: [1]
|
||||
type: lt
|
||||
operator:
|
||||
type: and
|
||||
reducer:
|
||||
type: last
|
||||
type: query
|
||||
noDataState: Alerting
|
||||
execErrState: Alerting
|
||||
annotations:
|
||||
summary: "Typhon has been down for >10m"
|
||||
labels:
|
||||
severity: critical
|
||||
- uid: typhon-data-stale
|
||||
title: "Typhon data stale (>180s for 10m)"
|
||||
condition: C
|
||||
for: "10m"
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: atlas-vm
|
||||
model:
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
expr: max(typhon_data_age_seconds) or on() vector(0)
|
||||
legendFormat: data age
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: atlas-vm
|
||||
- refId: B
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: A
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
type: reduce
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: B
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
type: threshold
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: [180]
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
reducer:
|
||||
type: last
|
||||
type: query
|
||||
noDataState: NoData
|
||||
execErrState: Error
|
||||
annotations:
|
||||
summary: "Typhon data age >180s for >10m"
|
||||
labels:
|
||||
severity: warning
|
||||
- uid: typhon-auth-failures
|
||||
title: "Typhon auth failures burst"
|
||||
condition: C
|
||||
for: "5m"
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: atlas-vm
|
||||
model:
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
expr: sum(increase(typhon_poll_errors_total{reason=\"auth\"}[10m])) or on() vector(0)
|
||||
legendFormat: auth failures 10m
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: atlas-vm
|
||||
- refId: B
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: A
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
type: reduce
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: B
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
type: threshold
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: [3]
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
reducer:
|
||||
type: last
|
||||
type: query
|
||||
noDataState: NoData
|
||||
execErrState: Error
|
||||
annotations:
|
||||
summary: "Typhon auth failures exceeded threshold in 10m"
|
||||
labels:
|
||||
severity: critical
|
||||
- uid: typhon-api-errors
|
||||
title: "Typhon API/timeouts burst"
|
||||
condition: C
|
||||
for: "15m"
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 900
|
||||
to: 0
|
||||
datasourceUid: atlas-vm
|
||||
model:
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
expr: sum(increase(typhon_poll_errors_total{reason=~\"api|timeout|unknown\"}[15m])) or on() vector(0)
|
||||
legendFormat: poll errors 15m
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: atlas-vm
|
||||
- refId: B
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: A
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
type: reduce
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: B
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
type: threshold
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: [10]
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
reducer:
|
||||
type: last
|
||||
type: query
|
||||
noDataState: OK
|
||||
execErrState: Error
|
||||
annotations:
|
||||
summary: "Typhon API/timeouts exceeded threshold in 15m"
|
||||
labels:
|
||||
severity: warning
|
||||
- uid: typhon-temp-critical
|
||||
title: "Tent temperature critical (>34C)"
|
||||
condition: C
|
||||
for: "10m"
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
to: 0
|
||||
datasourceUid: atlas-vm
|
||||
model:
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
expr: max(typhon_temperature_celsius) or on() vector(0)
|
||||
legendFormat: max temp
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: atlas-vm
|
||||
- refId: B
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: A
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
type: reduce
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: B
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
type: threshold
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: [34]
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
reducer:
|
||||
type: last
|
||||
type: query
|
||||
noDataState: OK
|
||||
execErrState: Error
|
||||
annotations:
|
||||
summary: "Typhon reports tent temperature >34C for >10m"
|
||||
labels:
|
||||
severity: critical
|
||||
- uid: typhon-humidity-high
|
||||
title: "Tent humidity high (>75%)"
|
||||
condition: C
|
||||
for: "20m"
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 1200
|
||||
to: 0
|
||||
datasourceUid: atlas-vm
|
||||
model:
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
expr: max(typhon_relative_humidity_percent) or on() vector(0)
|
||||
legendFormat: max humidity
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: atlas-vm
|
||||
- refId: B
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: A
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
type: reduce
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: B
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
type: threshold
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: [75]
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
reducer:
|
||||
type: last
|
||||
type: query
|
||||
noDataState: OK
|
||||
execErrState: Error
|
||||
annotations:
|
||||
summary: "Typhon reports relative humidity >75% for >20m"
|
||||
labels:
|
||||
severity: warning
|
||||
- uid: typhon-humidity-low
|
||||
title: "Tent humidity low (<30%)"
|
||||
condition: C
|
||||
for: "20m"
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 1200
|
||||
to: 0
|
||||
datasourceUid: atlas-vm
|
||||
model:
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
expr: min(typhon_relative_humidity_percent)
|
||||
legendFormat: min humidity
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: atlas-vm
|
||||
- refId: B
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: A
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
type: reduce
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: B
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
type: threshold
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: [30]
|
||||
type: lt
|
||||
operator:
|
||||
type: and
|
||||
reducer:
|
||||
type: last
|
||||
type: query
|
||||
noDataState: OK
|
||||
execErrState: Error
|
||||
annotations:
|
||||
summary: "Typhon reports relative humidity <30% for >20m"
|
||||
labels:
|
||||
severity: warning
|
||||
- uid: typhon-vpd-high
|
||||
title: "Tent VPD high (>2.0 kPa)"
|
||||
condition: C
|
||||
for: "20m"
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 1200
|
||||
to: 0
|
||||
datasourceUid: atlas-vm
|
||||
model:
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
expr: max(typhon_vpd_kpa) or on() vector(0)
|
||||
legendFormat: max vpd
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: atlas-vm
|
||||
- refId: B
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: A
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
type: reduce
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: B
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
type: threshold
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: [2.0]
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
reducer:
|
||||
type: last
|
||||
type: query
|
||||
noDataState: OK
|
||||
execErrState: Error
|
||||
annotations:
|
||||
summary: "Typhon reports VPD >2.0 kPa for >20m"
|
||||
labels:
|
||||
severity: warning
|
||||
- uid: typhon-vpd-low
|
||||
title: "Tent VPD low (<0.4 kPa)"
|
||||
condition: C
|
||||
for: "20m"
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 1200
|
||||
to: 0
|
||||
datasourceUid: atlas-vm
|
||||
model:
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
expr: min(typhon_vpd_kpa)
|
||||
legendFormat: min vpd
|
||||
datasource:
|
||||
type: prometheus
|
||||
uid: atlas-vm
|
||||
- refId: B
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: A
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
reducer: last
|
||||
type: reduce
|
||||
- refId: C
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
expression: B
|
||||
intervalMs: 60000
|
||||
maxDataPoints: 43200
|
||||
type: threshold
|
||||
conditions:
|
||||
- evaluator:
|
||||
params: [0.4]
|
||||
type: lt
|
||||
operator:
|
||||
type: and
|
||||
reducer:
|
||||
type: last
|
||||
type: query
|
||||
noDataState: OK
|
||||
execErrState: Error
|
||||
annotations:
|
||||
summary: "Typhon reports VPD <0.4 kPa for >20m"
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -593,6 +593,44 @@ data:
|
||||
}
|
||||
},
|
||||
"timeFrom": "30d"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"type": "timeseries",
|
||||
"title": "Astraios Usage",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 44
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"timeFrom": "30d"
|
||||
}
|
||||
],
|
||||
"time": {
|
||||
|
||||
@ -138,7 +138,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-pods dashboard",
|
||||
"title": "Open Atlas Pods",
|
||||
"url": "/d/atlas-pods",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -213,7 +213,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-pods dashboard",
|
||||
"title": "Open Atlas Pods",
|
||||
"url": "/d/atlas-pods",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -361,7 +361,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-pods dashboard",
|
||||
"title": "Open Atlas Pods",
|
||||
"url": "/d/atlas-pods",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -436,7 +436,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-pods dashboard",
|
||||
"title": "Open Atlas Pods",
|
||||
"url": "/d/atlas-pods",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -574,7 +574,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-nodes dashboard",
|
||||
"title": "Open Atlas Nodes",
|
||||
"url": "/d/atlas-nodes",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -651,7 +651,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-nodes dashboard",
|
||||
"title": "Open Atlas Nodes",
|
||||
"url": "/d/atlas-nodes",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -720,7 +720,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-nodes dashboard",
|
||||
"title": "Open Atlas Nodes",
|
||||
"url": "/d/atlas-nodes",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -789,7 +789,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-nodes dashboard",
|
||||
"title": "Open Atlas Nodes",
|
||||
"url": "/d/atlas-nodes",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -864,7 +864,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-storage dashboard",
|
||||
"title": "Open Atlas Storage",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -939,7 +939,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-storage dashboard",
|
||||
"title": "Open Atlas Storage",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1006,7 +1006,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-storage dashboard",
|
||||
"title": "Open Atlas Storage",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1073,7 +1073,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-storage dashboard",
|
||||
"title": "Open Atlas Storage",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1290,11 +1290,13 @@ data:
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "name_and_value"
|
||||
"textMode": "name_and_value",
|
||||
"orientation": "horizontal",
|
||||
"wideLayout": true
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-power dashboard",
|
||||
"title": "Open Atlas Power",
|
||||
"url": "/d/atlas-power",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1325,11 +1327,6 @@ data:
|
||||
"refId": "B",
|
||||
"expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100)",
|
||||
"legendFormat": "Statera"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "sum((ananke_ups_load_percent{job=\"ananke-power\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\"}) / 100)",
|
||||
"legendFormat": "combined"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -1349,7 +1346,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-power dashboard",
|
||||
"title": "Open Atlas Power",
|
||||
"url": "/d/atlas-power",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1372,14 +1369,26 @@ data:
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "max(atlas_climate_tent_temperature_celsius) or max(atlas_climate_temperature_celsius) or on() vector(0)",
|
||||
"expr": "max(typhon_temperature_celsius) or on() vector(0)",
|
||||
"legendFormat": "Tent Temp (\u00b0C)",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "max(atlas_climate_tent_pressure_kpa) or max(atlas_climate_pressure_kpa) or on() vector(0)",
|
||||
"legendFormat": "Tent Pressure (kPa)",
|
||||
"expr": "max(typhon_vpd_kpa) or on() vector(0)",
|
||||
"legendFormat": "Tent VPD (kPa)",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "max(typhon_relative_humidity_percent) or on() vector(0)",
|
||||
"legendFormat": "Tent RH (%)",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "max((243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / (17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))) or on() vector(0)",
|
||||
"legendFormat": "Dew Point (\u00b0C)",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
@ -1424,7 +1433,7 @@ data:
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Tent Pressure (kPa)"
|
||||
"options": "Tent VPD (kPa)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
@ -1432,6 +1441,30 @@ data:
|
||||
"value": "suffix:kPa"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Tent RH (%)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percent"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Dew Point (\u00b0C)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "celsius"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
@ -1447,17 +1480,17 @@ data:
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value",
|
||||
"orientation": "vertical",
|
||||
"wideLayout": false
|
||||
"orientation": "horizontal",
|
||||
"wideLayout": true
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-power dashboard",
|
||||
"title": "Open Atlas Power",
|
||||
"url": "/d/atlas-power",
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "Current tent temperature and pressure."
|
||||
"description": "Current tent temperature, humidity, VPD, and dew point."
|
||||
},
|
||||
{
|
||||
"id": 43,
|
||||
@ -1476,13 +1509,23 @@ data:
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "(atlas_climate_tent_temperature_celsius or atlas_climate_temperature_celsius)",
|
||||
"expr": "typhon_temperature_celsius",
|
||||
"legendFormat": "Temperature (\u00b0C)"
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "(atlas_climate_tent_pressure_kpa or atlas_climate_pressure_kpa)",
|
||||
"legendFormat": "Pressure (kPa)"
|
||||
"expr": "typhon_relative_humidity_percent",
|
||||
"legendFormat": "Humidity (%)"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "typhon_vpd_kpa",
|
||||
"legendFormat": "VPD (kPa)"
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "(243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / (17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))",
|
||||
"legendFormat": "Dew Point (\u00b0C)"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -1493,7 +1536,19 @@ data:
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Pressure (kPa)"
|
||||
"options": "Humidity (%)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percent"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "VPD (kPa)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
@ -1527,11 +1582,12 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-power dashboard",
|
||||
"title": "Open Atlas Power",
|
||||
"url": "/d/atlas-power",
|
||||
"targetBlank": true
|
||||
}
|
||||
]
|
||||
],
|
||||
"description": "Two-axis chart: tent temperature/humidity/dew point (left axis) and VPD in kPa (right axis)."
|
||||
},
|
||||
{
|
||||
"id": 140,
|
||||
@ -1550,25 +1606,25 @@ data:
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"outlet\"}) or max(atlas_climate_fan_activity_level{position=\"outlet\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"outlet\"}) or on() vector(0))",
|
||||
"legendFormat": "Inside Outlet",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"inside_inlet\"}) or max(atlas_climate_fan_activity_level{position=\"inside_inlet\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"inside_inlet\"}) or on() vector(0))",
|
||||
"legendFormat": "Inside Inlet",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"outside_inlet\"}) or max(atlas_climate_fan_activity_level{position=\"outside_inlet\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"outside_inlet\"}) or on() vector(0))",
|
||||
"legendFormat": "Outside Inlet",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"interior\"}) or max(atlas_climate_fan_activity_level{position=\"interior\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"interior\"}) or on() vector(0))",
|
||||
"legendFormat": "Interior Fans",
|
||||
"instant": true
|
||||
}
|
||||
@ -1615,11 +1671,13 @@ data:
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "name_and_value"
|
||||
"textMode": "name_and_value",
|
||||
"orientation": "horizontal",
|
||||
"wideLayout": true
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-power dashboard",
|
||||
"title": "Open Atlas Power",
|
||||
"url": "/d/atlas-power",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1642,22 +1700,22 @@ data:
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"outlet\"} or atlas_climate_fan_activity_level{position=\"outlet\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"outlet\"}",
|
||||
"legendFormat": "Inside Outlet"
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"inside_inlet\"} or atlas_climate_fan_activity_level{position=\"inside_inlet\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"inside_inlet\"}",
|
||||
"legendFormat": "Inside Inlet"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"outside_inlet\"} or atlas_climate_fan_activity_level{position=\"outside_inlet\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"outside_inlet\"}",
|
||||
"legendFormat": "Outside Inlet"
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"interior\"} or atlas_climate_fan_activity_level{position=\"interior\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"interior\"}",
|
||||
"legendFormat": "Interior Fans"
|
||||
}
|
||||
],
|
||||
@ -1679,7 +1737,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-power dashboard",
|
||||
"title": "Open Atlas Power",
|
||||
"url": "/d/atlas-power",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1750,7 +1808,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-jobs dashboard",
|
||||
"title": "Open Atlas Testing",
|
||||
"url": "/d/atlas-jobs",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1847,7 +1905,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-jobs dashboard",
|
||||
"title": "Open Atlas Testing",
|
||||
"url": "/d/atlas-jobs",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1870,53 +1928,58 @@ data:
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "(100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[1h]))) / clamp_min((sum(increase(ariadne_task_runs_total[1h]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ariadne\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "ariadne"
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "(100 * ((sum(increase(metis_builds_total{status=\"ok\"}[1h])) + sum(increase(metis_flashes_total{status=\"ok\"}[1h])))) / clamp_min(((sum(increase(metis_builds_total[1h])) + sum(increase(metis_flashes_total[1h])))), 1)) and on() (((sum(increase(metis_builds_total[1h])) + sum(increase(metis_flashes_total[1h])))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"metis\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"metis\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "metis"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "(100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[1h]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"ananke\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"ananke\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "ananke"
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"atlasbot\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "atlasbot"
|
||||
},
|
||||
{
|
||||
"refId": "E",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"lesavka\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "lesavka"
|
||||
},
|
||||
{
|
||||
"refId": "F",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"pegasus\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "pegasus"
|
||||
},
|
||||
{
|
||||
"refId": "G",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"soteria\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"soteria\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "soteria"
|
||||
},
|
||||
{
|
||||
"refId": "H",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"titan-iac\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "titan-iac"
|
||||
},
|
||||
{
|
||||
"refId": "I",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"bstein-home\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "bstein-home"
|
||||
},
|
||||
{
|
||||
"refId": "J",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0)",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"arcanagon\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "arcanagon"
|
||||
},
|
||||
{
|
||||
"refId": "K",
|
||||
"expr": "(100 * (sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\",status=~\"ok|passed|success\"}[1h]))) / clamp_min((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))), 1)) and on() ((sum(increase(platform_quality_gate_runs_total{suite=\"data-prepper\"}[1h]))) > 0) or on() vector(0)",
|
||||
"legendFormat": "data-prepper"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -1950,7 +2013,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-jobs dashboard",
|
||||
"title": "Open Atlas Testing",
|
||||
"url": "/d/atlas-jobs",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -1961,7 +2024,7 @@ data:
|
||||
{
|
||||
"id": 47,
|
||||
"type": "bargauge",
|
||||
"title": "Platform Suite Pass Rate (24h)",
|
||||
"title": "PVC Backup Health / Age",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -1974,31 +2037,35 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc((label_replace((100 * (sum(increase(ariadne_task_runs_total{status=\"ok\"}[24h]))) / clamp_min((sum(increase(ariadne_task_runs_total[24h]))), 1)) and on() ((sum(increase(ariadne_task_runs_total[24h]))) > 0), \"suite\", \"ariadne\", \"__name__\", \".*\") or label_replace((100 * ((sum(increase(metis_builds_total{status=\"ok\"}[24h])) + sum(increase(metis_flashes_total{status=\"ok\"}[24h])))) / clamp_min(((sum(increase(metis_builds_total[24h])) + sum(increase(metis_flashes_total[24h])))), 1)) and on() (((sum(increase(metis_builds_total[24h])) + sum(increase(metis_flashes_total[24h])))) > 0), \"suite\", \"metis\", \"__name__\", \".*\") or label_replace((100 * (sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\",status=\"ok\"}[24h]))) / clamp_min((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[24h]))), 1)) and on() ((sum(increase(ananke_quality_gate_runs_total{suite=\"ananke\"}[24h]))) > 0), \"suite\", \"ananke\", \"__name__\", \".*\")) or ((100 * (sum by (suite) (increase(platform_quality_gate_runs_total{status=~\"ok|passed|success\"}[24h]))) / clamp_min((sum by (suite) (increase(platform_quality_gate_runs_total[24h]))), 1)) and on(suite) ((sum by (suite) (increase(platform_quality_gate_runs_total[24h]))) > 0)))",
|
||||
"expr": "sort_desc(max by (namespace, pvc) (pvc_backup_age_hours or on(namespace, pvc) ((1 - pvc_backup_health) * 999)))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{suite}}",
|
||||
"legendFormat": "{{namespace}}/{{pvc}}",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"unit": "h",
|
||||
"min": 0,
|
||||
"max": 100,
|
||||
"max": null,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "red",
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 80
|
||||
"value": 20
|
||||
},
|
||||
{
|
||||
"color": "green",
|
||||
"value": 95
|
||||
"color": "orange",
|
||||
"value": 40
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 50
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -2029,12 +2096,12 @@ data:
|
||||
],
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-jobs dashboard",
|
||||
"url": "/d/atlas-jobs",
|
||||
"title": "Open Atlas Storage",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"description": "24-hour per-suite pass-rate snapshot. This complements the 7-day trend by showing each suite's current quality posture."
|
||||
"description": "Oldest successful backup age in hours by PVC with nightly cadence thresholds (green <=20h, yellow <40h, orange <50h, red >=50h). PVCs with missing or unhealthy backup state are forced to 999h so critical bars stay visible."
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
@ -2097,7 +2164,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-mail dashboard",
|
||||
"title": "Open Atlas Mail",
|
||||
"url": "/d/atlas-mail",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -2202,7 +2269,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-mail dashboard",
|
||||
"title": "Open Atlas Mail",
|
||||
"url": "/d/atlas-mail",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -2278,7 +2345,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-mail dashboard",
|
||||
"title": "Open Atlas Mail",
|
||||
"url": "/d/atlas-mail",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -2354,7 +2421,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-mail dashboard",
|
||||
"title": "Open Atlas Mail",
|
||||
"url": "/d/atlas-mail",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -2734,7 +2801,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-nodes dashboard",
|
||||
"title": "Open Atlas Nodes",
|
||||
"url": "/d/atlas-nodes",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -2781,7 +2848,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-nodes dashboard",
|
||||
"title": "Open Atlas Nodes",
|
||||
"url": "/d/atlas-nodes",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -3031,7 +3098,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-network dashboard",
|
||||
"title": "Open Atlas Network",
|
||||
"url": "/d/atlas-network",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -3075,7 +3142,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-network dashboard",
|
||||
"title": "Open Atlas Network",
|
||||
"url": "/d/atlas-network",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -3119,7 +3186,7 @@ data:
|
||||
},
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-network dashboard",
|
||||
"title": "Open Atlas Network",
|
||||
"url": "/d/atlas-network",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -3167,7 +3234,7 @@ data:
|
||||
"timeFrom": "30d",
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-storage dashboard",
|
||||
"title": "Open Atlas Storage",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
@ -3175,8 +3242,8 @@ data:
|
||||
},
|
||||
{
|
||||
"id": 22,
|
||||
"type": "bargauge",
|
||||
"title": "Nodes Closest to Full Root Disks",
|
||||
"type": "timeseries",
|
||||
"title": "Nodes Closest to Full Astraios Disks",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "atlas-vm"
|
||||
@ -3189,68 +3256,36 @@ data:
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sort_desc(topk(12, avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))))",
|
||||
"expr": "avg by (node) ((avg by (instance) ((1 - (node_filesystem_avail_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"} / node_filesystem_size_bytes{mountpoint=\"/mnt/astraios\",fstype!~\"tmpfs|overlay\"})) * 100)) * on(instance) group_left(node) label_replace(node_uname_info{nodename!=\"\"}, \"node\", \"$1\", \"nodename\", \"(.*)\"))",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{node}}"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"min": 0,
|
||||
"max": 100,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "yellow",
|
||||
"value": 50
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 75
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 91.5
|
||||
}
|
||||
]
|
||||
}
|
||||
"unit": "percent"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"options": {
|
||||
"displayMode": "gradient",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"legend": {
|
||||
"displayMode": "table",
|
||||
"placement": "right",
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
"last"
|
||||
]
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"timeFrom": "1w",
|
||||
"links": [
|
||||
{
|
||||
"title": "Open atlas-storage dashboard",
|
||||
"title": "Open Atlas Storage",
|
||||
"url": "/d/atlas-storage",
|
||||
"targetBlank": true
|
||||
}
|
||||
],
|
||||
"transformations": [
|
||||
{
|
||||
"id": "sortBy",
|
||||
"options": {
|
||||
"fields": [
|
||||
"Value"
|
||||
],
|
||||
"order": "desc"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -3371,5 +3406,11 @@ data:
|
||||
"to": "now"
|
||||
},
|
||||
"refresh": "1m",
|
||||
"links": []
|
||||
"links": [
|
||||
{
|
||||
"title": "Atlas Testing (Internal)",
|
||||
"url": "/d/atlas-jobs",
|
||||
"targetBlank": false
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@ -224,7 +224,9 @@ data:
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "name_and_value"
|
||||
"textMode": "name_and_value",
|
||||
"orientation": "horizontal",
|
||||
"wideLayout": true
|
||||
},
|
||||
"description": "Per-UPS live snapshot: current draw in watts, estimated battery runtime if discharge started now, and charging/discharging status."
|
||||
},
|
||||
@ -252,11 +254,6 @@ data:
|
||||
"refId": "B",
|
||||
"expr": "((ananke_ups_load_percent{job=\"ananke-power\",source=\"Statera\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\",source=\"Statera\"}) / 100)",
|
||||
"legendFormat": "Statera"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "sum((ananke_ups_load_percent{job=\"ananke-power\"} * ananke_ups_power_nominal_watts{job=\"ananke-power\"}) / 100)",
|
||||
"legendFormat": "combined"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -274,7 +271,7 @@ data:
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"description": "Historical UPS power consumption in watts for titan-db, tethys, and combined load."
|
||||
"description": "Historical UPS power consumption in watts for titan-db and tethys."
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
@ -293,14 +290,26 @@ data:
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "max(atlas_climate_tent_temperature_celsius) or max(atlas_climate_temperature_celsius) or on() vector(0)",
|
||||
"expr": "max(typhon_temperature_celsius) or on() vector(0)",
|
||||
"legendFormat": "Tent Temp (\u00b0C)",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "max(atlas_climate_tent_pressure_kpa) or max(atlas_climate_pressure_kpa) or on() vector(0)",
|
||||
"legendFormat": "Tent Pressure (kPa)",
|
||||
"expr": "max(typhon_vpd_kpa) or on() vector(0)",
|
||||
"legendFormat": "Tent VPD (kPa)",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "max(typhon_relative_humidity_percent) or on() vector(0)",
|
||||
"legendFormat": "Tent RH (%)",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "max((243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / (17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))) or on() vector(0)",
|
||||
"legendFormat": "Dew Point (\u00b0C)",
|
||||
"instant": true
|
||||
}
|
||||
],
|
||||
@ -345,12 +354,36 @@ data:
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Tent Pressure (kPa)"
|
||||
"options": "Tent VPD (kPa)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "none"
|
||||
"value": "suffix:kPa"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Tent RH (%)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percent"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Dew Point (\u00b0C)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "celsius"
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -367,9 +400,11 @@ data:
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "name_and_value"
|
||||
"textMode": "name_and_value",
|
||||
"orientation": "horizontal",
|
||||
"wideLayout": true
|
||||
},
|
||||
"description": "Current tent temperature and air pressure. These render once climate telemetry is online."
|
||||
"description": "Current tent temperature, humidity, VPD, and dew point. These render once Typhon climate telemetry is online."
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
@ -388,13 +423,23 @@ data:
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "(atlas_climate_tent_temperature_celsius or atlas_climate_temperature_celsius)",
|
||||
"expr": "typhon_temperature_celsius",
|
||||
"legendFormat": "Temperature (\u00b0C)"
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "(atlas_climate_tent_pressure_kpa or atlas_climate_pressure_kpa)",
|
||||
"legendFormat": "Pressure (kPa)"
|
||||
"expr": "typhon_relative_humidity_percent",
|
||||
"legendFormat": "Humidity (%)"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "typhon_vpd_kpa",
|
||||
"legendFormat": "VPD (kPa)"
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "(243.12 * (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius))) / (17.62 - (ln(clamp_min(typhon_relative_humidity_percent, 1) / 100) + (17.62 * typhon_temperature_celsius) / (243.12 + typhon_temperature_celsius)))",
|
||||
"legendFormat": "Dew Point (\u00b0C)"
|
||||
}
|
||||
],
|
||||
"fieldConfig": {
|
||||
@ -405,7 +450,19 @@ data:
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "Pressure (kPa)"
|
||||
"options": "Humidity (%)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
"id": "unit",
|
||||
"value": "percent"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": {
|
||||
"id": "byName",
|
||||
"options": "VPD (kPa)"
|
||||
},
|
||||
"properties": [
|
||||
{
|
||||
@ -437,7 +494,7 @@ data:
|
||||
"mode": "multi"
|
||||
}
|
||||
},
|
||||
"description": "Two-axis chart: tent temperature (left axis) and tent pressure in kPa (right axis)."
|
||||
"description": "Two-axis chart: tent temperature/humidity/dew point (left axis) and tent VPD in kPa (right axis)."
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
@ -456,25 +513,25 @@ data:
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"outlet\"}) or max(atlas_climate_fan_activity_level{position=\"outlet\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"outlet\"}) or on() vector(0))",
|
||||
"legendFormat": "Inside Outlet",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"inside_inlet\"}) or max(atlas_climate_fan_activity_level{position=\"inside_inlet\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"inside_inlet\"}) or on() vector(0))",
|
||||
"legendFormat": "Inside Inlet",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"outside_inlet\"}) or max(atlas_climate_fan_activity_level{position=\"outside_inlet\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"outside_inlet\"}) or on() vector(0))",
|
||||
"legendFormat": "Outside Inlet",
|
||||
"instant": true
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "round(max(atlas_climate_fan_activity_level{fan_group=\"interior\"}) or max(atlas_climate_fan_activity_level{position=\"interior\"}) or on() vector(0))",
|
||||
"expr": "round(max(typhon_fan_speed_level{fan_group=\"interior\"}) or on() vector(0))",
|
||||
"legendFormat": "Interior Fans",
|
||||
"instant": true
|
||||
}
|
||||
@ -521,7 +578,9 @@ data:
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "name_and_value"
|
||||
"textMode": "name_and_value",
|
||||
"orientation": "horizontal",
|
||||
"wideLayout": true
|
||||
},
|
||||
"description": "Current fan activity levels (0-10): inside outlet, inside inlet, outside inlet, and interior fans."
|
||||
},
|
||||
@ -542,22 +601,22 @@ data:
|
||||
"targets": [
|
||||
{
|
||||
"refId": "A",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"outlet\"} or atlas_climate_fan_activity_level{position=\"outlet\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"outlet\"}",
|
||||
"legendFormat": "Inside Outlet"
|
||||
},
|
||||
{
|
||||
"refId": "B",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"inside_inlet\"} or atlas_climate_fan_activity_level{position=\"inside_inlet\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"inside_inlet\"}",
|
||||
"legendFormat": "Inside Inlet"
|
||||
},
|
||||
{
|
||||
"refId": "C",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"outside_inlet\"} or atlas_climate_fan_activity_level{position=\"outside_inlet\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"outside_inlet\"}",
|
||||
"legendFormat": "Outside Inlet"
|
||||
},
|
||||
{
|
||||
"refId": "D",
|
||||
"expr": "(atlas_climate_fan_activity_level{fan_group=\"interior\"} or atlas_climate_fan_activity_level{position=\"interior\"})",
|
||||
"expr": "typhon_fan_speed_level{fan_group=\"interior\"}",
|
||||
"legendFormat": "Interior Fans"
|
||||
}
|
||||
],
|
||||
|
||||
@ -23,7 +23,6 @@ resources:
|
||||
- platform-quality-gateway-pvc.yaml
|
||||
- platform-quality-gateway-service.yaml
|
||||
- platform-quality-gateway-deployment.yaml
|
||||
- platform-quality-suite-probe-cronjob.yaml
|
||||
- vault-sync-deployment.yaml
|
||||
- grafana-alerting-config.yaml
|
||||
- grafana-folders.yaml
|
||||
|
||||
@ -1,39 +0,0 @@
|
||||
# services/monitoring/platform-quality-suite-probe-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: platform-quality-suite-probe
|
||||
namespace: monitoring
|
||||
spec:
|
||||
schedule: "*/15 * * * *"
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 2
|
||||
failedJobsHistoryLimit: 2
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 0
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: platform-quality-suite-probe
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
containers:
|
||||
- name: probe
|
||||
image: curlimages/curl:8.12.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: ["/bin/sh", "/scripts/platform_quality_suite_probe.sh"]
|
||||
env:
|
||||
- name: PUSHGATEWAY_URL
|
||||
value: http://platform-quality-gateway.monitoring.svc.cluster.local:9091
|
||||
- name: HTTP_TIMEOUT_SECONDS
|
||||
value: "12"
|
||||
volumeMounts:
|
||||
- name: probe-script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: probe-script
|
||||
configMap:
|
||||
name: platform-quality-suite-probe-script
|
||||
defaultMode: 0555
|
||||
@ -1,113 +0,0 @@
|
||||
# services/nextcloud-mail-sync/cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: nextcloud-mail-sync
|
||||
namespace: nextcloud
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "0 5 * * *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 3
|
||||
failedJobsHistoryLimit: 1
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/role: "nextcloud"
|
||||
vault.hashicorp.com/agent-inject-secret-nextcloud-env.sh: "kv/data/atlas/nextcloud/nextcloud-db"
|
||||
vault.hashicorp.com/agent-inject-template-nextcloud-env.sh: |
|
||||
{{ with secret "kv/data/atlas/nextcloud/nextcloud-db" }}
|
||||
export POSTGRES_DB="{{ .Data.data.database }}"
|
||||
export POSTGRES_USER="{{ index .Data.data "db-username" }}"
|
||||
export POSTGRES_PASSWORD="{{ index .Data.data "db-password" }}"
|
||||
{{ end }}
|
||||
{{ with secret "kv/data/atlas/nextcloud/nextcloud-admin" }}
|
||||
export NEXTCLOUD_ADMIN_USER="{{ index .Data.data "admin-user" }}"
|
||||
export NEXTCLOUD_ADMIN_PASSWORD="{{ index .Data.data "admin-password" }}"
|
||||
{{ end }}
|
||||
export ADMIN_USER="${NEXTCLOUD_ADMIN_USER}"
|
||||
export ADMIN_PASS="${NEXTCLOUD_ADMIN_PASSWORD}"
|
||||
{{ with secret "kv/data/atlas/nextcloud/nextcloud-oidc" }}
|
||||
export OIDC_CLIENT_ID="{{ index .Data.data "client-id" }}"
|
||||
export OIDC_CLIENT_SECRET="{{ index .Data.data "client-secret" }}"
|
||||
{{ end }}
|
||||
{{ with secret "kv/data/atlas/shared/postmark-relay" }}
|
||||
export SMTP_NAME="{{ index .Data.data "apikey" }}"
|
||||
export SMTP_PASSWORD="{{ index .Data.data "apikey" }}"
|
||||
{{ end }}
|
||||
{{ with secret "kv/data/atlas/shared/keycloak-admin" }}
|
||||
export KC_ADMIN_USER="{{ .Data.data.username }}"
|
||||
export KC_ADMIN_PASS="{{ .Data.data.password }}"
|
||||
{{ end }}
|
||||
spec:
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
restartPolicy: OnFailure
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
serviceAccountName: nextcloud-vault
|
||||
containers:
|
||||
- name: mail-sync
|
||||
image: nextcloud:29-apache
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
env:
|
||||
- name: KC_BASE
|
||||
value: http://keycloak.sso.svc.cluster.local
|
||||
- name: KC_REALM
|
||||
value: atlas
|
||||
- name: MAILU_DOMAIN
|
||||
value: bstein.dev
|
||||
- name: POSTGRES_HOST
|
||||
value: postgres-service.postgres.svc.cluster.local
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
volumeMounts:
|
||||
- name: nextcloud-web
|
||||
mountPath: /var/www/html
|
||||
- name: nextcloud-config-pvc
|
||||
mountPath: /var/www/html/config
|
||||
- name: nextcloud-custom-apps
|
||||
mountPath: /var/www/html/custom_apps
|
||||
- name: nextcloud-user-data
|
||||
mountPath: /var/www/html/data
|
||||
- name: sync-script
|
||||
mountPath: /sync/sync.sh
|
||||
subPath: sync.sh
|
||||
args:
|
||||
- |
|
||||
set -eu
|
||||
. /vault/secrets/nextcloud-env.sh
|
||||
exec /sync/sync.sh
|
||||
volumes:
|
||||
- name: nextcloud-config-pvc
|
||||
persistentVolumeClaim:
|
||||
claimName: nextcloud-config-v2
|
||||
- name: nextcloud-custom-apps
|
||||
persistentVolumeClaim:
|
||||
claimName: nextcloud-custom-apps-v2
|
||||
- name: nextcloud-user-data
|
||||
persistentVolumeClaim:
|
||||
claimName: nextcloud-user-data-v2
|
||||
- name: nextcloud-web
|
||||
persistentVolumeClaim:
|
||||
claimName: nextcloud-web-v2
|
||||
- name: sync-script
|
||||
configMap:
|
||||
name: nextcloud-mail-sync-script
|
||||
defaultMode: 0755
|
||||
@ -3,11 +3,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: nextcloud
|
||||
resources:
|
||||
- cronjob.yaml
|
||||
- portal-rbac.yaml
|
||||
configMapGenerator:
|
||||
- name: nextcloud-mail-sync-script
|
||||
files:
|
||||
- sync.sh=scripts/nextcloud-mail-sync.sh
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
|
||||
@ -1,235 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
KC_BASE="${KC_BASE:?}"
|
||||
KC_REALM="${KC_REALM:?}"
|
||||
KC_ADMIN_USER="${KC_ADMIN_USER:?}"
|
||||
KC_ADMIN_PASS="${KC_ADMIN_PASS:?}"
|
||||
MAILU_DOMAIN="${MAILU_DOMAIN:?}"
|
||||
ONLY_USERNAME="${ONLY_USERNAME:-}"
|
||||
POSTGRES_HOST="${POSTGRES_HOST:-}"
|
||||
POSTGRES_DB="${POSTGRES_DB:-}"
|
||||
POSTGRES_USER="${POSTGRES_USER:-}"
|
||||
POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-}"
|
||||
|
||||
if ! command -v jq >/dev/null 2>&1; then
|
||||
apt-get update && apt-get install -y jq curl >/dev/null
|
||||
fi
|
||||
|
||||
ensure_psql() {
|
||||
if command -v psql >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
apt-get update && apt-get install -y postgresql-client >/dev/null
|
||||
}
|
||||
|
||||
set_editor_mode_richtext() {
|
||||
local ids=("$@")
|
||||
|
||||
if [[ ${#ids[@]} -eq 0 ]]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [[ -z "${POSTGRES_HOST}" || -z "${POSTGRES_DB}" || -z "${POSTGRES_USER}" || -z "${POSTGRES_PASSWORD}" ]]; then
|
||||
echo "WARN: missing postgres env; cannot update mail editor_mode" >&2
|
||||
return 0
|
||||
fi
|
||||
|
||||
ensure_psql
|
||||
|
||||
local ids_csv
|
||||
ids_csv=$(IFS=,; echo "${ids[*]}")
|
||||
|
||||
PGPASSWORD="${POSTGRES_PASSWORD}" psql \
|
||||
-h "${POSTGRES_HOST}" \
|
||||
-U "${POSTGRES_USER}" \
|
||||
-d "${POSTGRES_DB}" \
|
||||
-v ON_ERROR_STOP=1 \
|
||||
-c "UPDATE oc_mail_accounts SET editor_mode='richtext' WHERE id IN (${ids_csv}) AND editor_mode <> 'richtext';" \
|
||||
>/dev/null
|
||||
}
|
||||
|
||||
list_mail_accounts() {
|
||||
local user_id="${1}"
|
||||
local export_out
|
||||
|
||||
# Nextcloud Mail does not provide a list command; export is safe (does not print passwords).
|
||||
if ! export_out=$(/usr/sbin/runuser -u www-data -- php occ mail:account:export "${user_id}"); then
|
||||
echo "WARN: unable to export mail accounts for ${user_id}; skipping sync for safety" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
awk -v OFS='\t' '
|
||||
BEGIN { IGNORECASE=1; id="" }
|
||||
$1 == "Account" { id=$2; sub(":", "", id); next }
|
||||
$1 == "-" && tolower($2) ~ /^e-?mail:$/ { if (id) print id, $3 }
|
||||
' <<<"${export_out}" | sort -u
|
||||
}
|
||||
|
||||
token=$(
|
||||
curl -fsS \
|
||||
--data-urlencode "grant_type=password" \
|
||||
--data-urlencode "client_id=admin-cli" \
|
||||
--data-urlencode "username=${KC_ADMIN_USER}" \
|
||||
--data-urlencode "password=${KC_ADMIN_PASS}" \
|
||||
"${KC_BASE}/realms/master/protocol/openid-connect/token" | jq -r '.access_token // empty'
|
||||
)
|
||||
|
||||
if [[ -z "${token}" || "${token}" == "null" ]]; then
|
||||
echo "Failed to obtain admin token"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd /var/www/html
|
||||
|
||||
kc_users_url="${KC_BASE}/admin/realms/${KC_REALM}/users?max=2000&briefRepresentation=false"
|
||||
if [[ -n "${ONLY_USERNAME}" ]]; then
|
||||
username_q=$(jq -nr --arg v "${ONLY_USERNAME}" '$v|@uri')
|
||||
kc_users_url="${KC_BASE}/admin/realms/${KC_REALM}/users?username=${username_q}&exact=true&max=1&briefRepresentation=false"
|
||||
fi
|
||||
|
||||
users=$(curl -fsS -H "Authorization: Bearer ${token}" "${kc_users_url}")
|
||||
if ! jq -e 'type == "array"' >/dev/null 2>&1 <<<"${users}"; then
|
||||
echo "ERROR: Keycloak user list is not an array; aborting sync" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
kc_set_user_mail_meta() {
|
||||
local user_id="${1}"
|
||||
local primary_email="${2}"
|
||||
local mailu_account_count="${3}"
|
||||
local synced_at="${4}"
|
||||
|
||||
# Fetch the full user representation so we don't accidentally clobber attributes.
|
||||
local user_json updated_json
|
||||
if ! user_json=$(curl -fsS -H "Authorization: Bearer ${token}" \
|
||||
"${KC_BASE}/admin/realms/${KC_REALM}/users/${user_id}"); then
|
||||
echo "WARN: unable to fetch Keycloak user ${user_id} for metadata writeback" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
updated_json=$(
|
||||
jq -c \
|
||||
--arg primary_email "${primary_email}" \
|
||||
--arg mailu_account_count "${mailu_account_count}" \
|
||||
--arg synced_at "${synced_at}" \
|
||||
'
|
||||
.attributes = (.attributes // {}) |
|
||||
.attributes.nextcloud_mail_primary_email = [$primary_email] |
|
||||
.attributes.nextcloud_mail_account_count = [$mailu_account_count] |
|
||||
.attributes.nextcloud_mail_synced_at = [$synced_at] |
|
||||
del(.access)
|
||||
' <<<"${user_json}"
|
||||
)
|
||||
|
||||
curl -fsS -X PUT \
|
||||
-H "Authorization: Bearer ${token}" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "${updated_json}" \
|
||||
"${KC_BASE}/admin/realms/${KC_REALM}/users/${user_id}" >/dev/null
|
||||
}
|
||||
|
||||
while read -r user; do
|
||||
user_id=$(jq -r '.id' <<<"${user}")
|
||||
username=$(jq -r '.username' <<<"${user}")
|
||||
keycloak_email=$(echo "${user}" | jq -r '.email // empty')
|
||||
mailu_email=$(echo "${user}" | jq -r '(.attributes.mailu_email[0] // .attributes.mailu_email // empty)')
|
||||
app_pw=$(echo "${user}" | jq -r '(.attributes.mailu_app_password[0] // .attributes.mailu_app_password // empty)')
|
||||
|
||||
if [[ -z "${mailu_email}" ]]; then
|
||||
if [[ -n "${keycloak_email}" && "${keycloak_email,,}" == *"@${MAILU_DOMAIN,,}" ]]; then
|
||||
mailu_email="${keycloak_email}"
|
||||
else
|
||||
mailu_email="${username}@${MAILU_DOMAIN}"
|
||||
fi
|
||||
fi
|
||||
|
||||
[[ -z "${mailu_email}" || -z "${app_pw}" ]] && continue
|
||||
|
||||
if ! accounts=$(list_mail_accounts "${username}"); then
|
||||
continue
|
||||
fi
|
||||
|
||||
# Manage only internal Mailu-domain accounts; leave any external accounts untouched.
|
||||
mailu_accounts=$(awk -v d="${MAILU_DOMAIN,,}" 'tolower($2) ~ ("@" d "$") {print}' <<<"${accounts}" || true)
|
||||
|
||||
desired_email="${mailu_email}"
|
||||
primary_id=""
|
||||
primary_email=""
|
||||
|
||||
if [[ -n "${mailu_accounts}" ]]; then
|
||||
while IFS=$'\t' read -r account_id account_email; do
|
||||
if [[ -z "${primary_id}" ]]; then
|
||||
primary_id="${account_id}"
|
||||
primary_email="${account_email}"
|
||||
fi
|
||||
if [[ "${account_email,,}" == "${desired_email,,}" ]]; then
|
||||
primary_id="${account_id}"
|
||||
primary_email="${account_email}"
|
||||
break
|
||||
fi
|
||||
done <<<"${mailu_accounts}"
|
||||
|
||||
echo "Updating ${username} mail account ${primary_id} (${primary_email})"
|
||||
/usr/sbin/runuser -u www-data -- php occ mail:account:update -q "${primary_id}" \
|
||||
--name "${username}" \
|
||||
--email "${desired_email}" \
|
||||
--imap-host mail.bstein.dev \
|
||||
--imap-port 993 \
|
||||
--imap-ssl-mode ssl \
|
||||
--imap-user "${desired_email}" \
|
||||
--imap-password "${app_pw}" \
|
||||
--smtp-host mail.bstein.dev \
|
||||
--smtp-port 587 \
|
||||
--smtp-ssl-mode tls \
|
||||
--smtp-user "${desired_email}" \
|
||||
--smtp-password "${app_pw}" \
|
||||
--auth-method password >/dev/null 2>&1 || true
|
||||
|
||||
# Remove any extra Mailu-domain accounts for this user to prevent duplicates.
|
||||
while IFS=$'\t' read -r account_id account_email; do
|
||||
if [[ "${account_id}" == "${primary_id}" ]]; then
|
||||
continue
|
||||
fi
|
||||
echo "Deleting extra mail account ${account_id} (${account_email})"
|
||||
/usr/sbin/runuser -u www-data -- php occ mail:account:delete -q "${account_id}" >/dev/null 2>&1 || true
|
||||
done <<<"${mailu_accounts}"
|
||||
else
|
||||
echo "Creating mail account for ${username} (${desired_email})"
|
||||
/usr/sbin/runuser -u www-data -- php occ mail:account:create -q \
|
||||
"${username}" "${username}" "${desired_email}" \
|
||||
mail.bstein.dev 993 ssl "${desired_email}" "${app_pw}" \
|
||||
mail.bstein.dev 587 tls "${desired_email}" "${app_pw}" password >/dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
# Write non-secret metadata back to Keycloak for UI introspection and onboarding gating.
|
||||
synced_at=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
if accounts_after=$(list_mail_accounts "${username}"); then
|
||||
mailu_accounts_after=$(awk -v d="${MAILU_DOMAIN,,}" 'tolower($2) ~ ("@" d "$") {print}' <<<"${accounts_after}" || true)
|
||||
if [[ -n "${mailu_accounts_after}" ]]; then
|
||||
mailu_account_count=$(printf '%s\n' "${mailu_accounts_after}" | wc -l | tr -d ' ')
|
||||
else
|
||||
mailu_account_count="0"
|
||||
fi
|
||||
primary_email_after=""
|
||||
editor_mode_ids=()
|
||||
if [[ -n "${mailu_accounts_after}" ]]; then
|
||||
while IFS=$'\t' read -r _account_id account_email; do
|
||||
editor_mode_ids+=("${_account_id}")
|
||||
if [[ "${account_email,,}" == "${desired_email,,}" ]]; then
|
||||
primary_email_after="${account_email}"
|
||||
break
|
||||
fi
|
||||
if [[ -z "${primary_email_after}" ]]; then
|
||||
primary_email_after="${account_email}"
|
||||
fi
|
||||
done <<<"${mailu_accounts_after}"
|
||||
fi
|
||||
set_editor_mode_richtext "${editor_mode_ids[@]}"
|
||||
else
|
||||
mailu_account_count="0"
|
||||
primary_email_after=""
|
||||
fi
|
||||
|
||||
kc_set_user_mail_meta "${user_id}" "${primary_email_after}" "${mailu_account_count}" "${synced_at}" || true
|
||||
done < <(jq -c '.[]' <<<"${users}")
|
||||
@ -18,6 +18,7 @@ spec:
|
||||
spec:
|
||||
nodeSelector:
|
||||
hardware: rpi5
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: collabora
|
||||
image: collabora/code@sha256:3c58d0e9bae75e4647467d0c7d91cb66f261d3e814709aed590b5c334a04db26
|
||||
|
||||
@ -1,48 +0,0 @@
|
||||
# services/nextcloud/cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: nextcloud-cron
|
||||
namespace: nextcloud
|
||||
spec:
|
||||
schedule: "*/5 * * * *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
securityContext:
|
||||
runAsUser: 33
|
||||
runAsGroup: 33
|
||||
fsGroup: 33
|
||||
restartPolicy: OnFailure
|
||||
containers:
|
||||
- name: nextcloud-cron
|
||||
image: nextcloud:29-apache
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- "cd /var/www/html && php -f cron.php"
|
||||
volumeMounts:
|
||||
- name: nextcloud-web
|
||||
mountPath: /var/www/html
|
||||
- name: nextcloud-config-pvc
|
||||
mountPath: /var/www/html/config
|
||||
- name: nextcloud-custom-apps
|
||||
mountPath: /var/www/html/custom_apps
|
||||
- name: nextcloud-user-data
|
||||
mountPath: /var/www/html/data
|
||||
volumes:
|
||||
- name: nextcloud-config-pvc
|
||||
persistentVolumeClaim:
|
||||
claimName: nextcloud-config-v2
|
||||
- name: nextcloud-custom-apps
|
||||
persistentVolumeClaim:
|
||||
claimName: nextcloud-custom-apps-v2
|
||||
- name: nextcloud-user-data
|
||||
persistentVolumeClaim:
|
||||
claimName: nextcloud-user-data-v2
|
||||
- name: nextcloud-web
|
||||
persistentVolumeClaim:
|
||||
claimName: nextcloud-web-v2
|
||||
@ -9,13 +9,5 @@ resources:
|
||||
- pvc.yaml
|
||||
- deployment.yaml
|
||||
- collabora.yaml
|
||||
- cronjob.yaml
|
||||
- maintenance-cronjob.yaml
|
||||
- service.yaml
|
||||
- ingress.yaml
|
||||
configMapGenerator:
|
||||
- name: nextcloud-maintenance-script
|
||||
files:
|
||||
- maintenance.sh=scripts/nextcloud-maintenance.sh
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
|
||||
@ -1,98 +0,0 @@
|
||||
# services/nextcloud/maintenance-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: nextcloud-maintenance
|
||||
namespace: nextcloud
|
||||
spec:
|
||||
schedule: "30 4 * * *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/role: "nextcloud"
|
||||
vault.hashicorp.com/agent-inject-secret-nextcloud-env.sh: "kv/data/atlas/nextcloud/nextcloud-db"
|
||||
vault.hashicorp.com/agent-inject-template-nextcloud-env.sh: |
|
||||
{{ with secret "kv/data/atlas/nextcloud/nextcloud-db" }}
|
||||
export POSTGRES_DB="{{ .Data.data.database }}"
|
||||
export POSTGRES_USER="{{ index .Data.data "db-username" }}"
|
||||
export POSTGRES_PASSWORD="{{ index .Data.data "db-password" }}"
|
||||
{{ end }}
|
||||
{{ with secret "kv/data/atlas/nextcloud/nextcloud-admin" }}
|
||||
export NEXTCLOUD_ADMIN_USER="{{ index .Data.data "admin-user" }}"
|
||||
export NEXTCLOUD_ADMIN_PASSWORD="{{ index .Data.data "admin-password" }}"
|
||||
{{ end }}
|
||||
export ADMIN_USER="${NEXTCLOUD_ADMIN_USER}"
|
||||
export ADMIN_PASS="${NEXTCLOUD_ADMIN_PASSWORD}"
|
||||
{{ with secret "kv/data/atlas/nextcloud/nextcloud-oidc" }}
|
||||
export OIDC_CLIENT_ID="{{ index .Data.data "client-id" }}"
|
||||
export OIDC_CLIENT_SECRET="{{ index .Data.data "client-secret" }}"
|
||||
{{ end }}
|
||||
{{ with secret "kv/data/atlas/shared/postmark-relay" }}
|
||||
export SMTP_NAME="{{ index .Data.data "apikey" }}"
|
||||
export SMTP_PASSWORD="{{ index .Data.data "apikey" }}"
|
||||
{{ end }}
|
||||
{{ with secret "kv/data/atlas/shared/keycloak-admin" }}
|
||||
export KC_ADMIN_USER="{{ .Data.data.username }}"
|
||||
export KC_ADMIN_PASS="{{ .Data.data.password }}"
|
||||
{{ end }}
|
||||
spec:
|
||||
restartPolicy: OnFailure
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
serviceAccountName: nextcloud-vault
|
||||
containers:
|
||||
- name: maintenance
|
||||
image: nextcloud:29-apache
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -eu
|
||||
. /vault/secrets/nextcloud-env.sh
|
||||
exec /maintenance/maintenance.sh
|
||||
env:
|
||||
- name: NC_URL
|
||||
value: https://cloud.bstein.dev
|
||||
volumeMounts:
|
||||
- name: nextcloud-web
|
||||
mountPath: /var/www/html
|
||||
- name: nextcloud-config-pvc
|
||||
mountPath: /var/www/html/config
|
||||
- name: nextcloud-custom-apps
|
||||
mountPath: /var/www/html/custom_apps
|
||||
- name: nextcloud-user-data
|
||||
mountPath: /var/www/html/data
|
||||
- name: maintenance-script
|
||||
mountPath: /maintenance/maintenance.sh
|
||||
subPath: maintenance.sh
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
volumes:
|
||||
- name: nextcloud-config-pvc
|
||||
persistentVolumeClaim:
|
||||
claimName: nextcloud-config-v2
|
||||
- name: nextcloud-custom-apps
|
||||
persistentVolumeClaim:
|
||||
claimName: nextcloud-custom-apps-v2
|
||||
- name: nextcloud-user-data
|
||||
persistentVolumeClaim:
|
||||
claimName: nextcloud-user-data-v2
|
||||
- name: nextcloud-web
|
||||
persistentVolumeClaim:
|
||||
claimName: nextcloud-web-v2
|
||||
- name: maintenance-script
|
||||
configMap:
|
||||
name: nextcloud-maintenance-script
|
||||
defaultMode: 0755
|
||||
@ -1,108 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
NC_URL="${NC_URL:-https://cloud.bstein.dev}"
|
||||
ADMIN_USER="${ADMIN_USER:?}"
|
||||
ADMIN_PASS="${ADMIN_PASS:?}"
|
||||
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
apt-get update -qq
|
||||
apt-get install -y -qq curl jq >/dev/null
|
||||
|
||||
run_occ() {
|
||||
runuser -u www-data -- php /var/www/html/occ "$@"
|
||||
}
|
||||
|
||||
log() { echo "[$(date -Is)] $*"; }
|
||||
|
||||
log "Ensuring Nextcloud app files are present"
|
||||
if [[ ! -d /var/www/html/lib && -d /usr/src/nextcloud/lib ]]; then
|
||||
rsync -a --delete \
|
||||
--exclude config \
|
||||
--exclude data \
|
||||
/usr/src/nextcloud/ /var/www/html/
|
||||
fi
|
||||
|
||||
log "Ensuring Nextcloud permissions"
|
||||
mkdir -p /var/www/html/data
|
||||
chown 33:33 /var/www/html || true
|
||||
chmod 775 /var/www/html || true
|
||||
chown -R 33:33 /var/www/html/apps /var/www/html/custom_apps /var/www/html/data /var/www/html/config 2>/dev/null || true
|
||||
|
||||
log "Applying Atlas theming"
|
||||
run_occ config:app:set theming name --value "Atlas Cloud"
|
||||
run_occ config:app:set theming slogan --value "Unified access to Atlas services"
|
||||
run_occ config:app:set theming url --value "https://cloud.bstein.dev"
|
||||
run_occ config:app:set theming color --value "#0f172a"
|
||||
run_occ config:app:set theming disable-user-theming --value "yes"
|
||||
|
||||
log "Applying Atlas Mail styling defaults"
|
||||
run_occ app:install customcss >/dev/null 2>&1 || true
|
||||
run_occ app:enable customcss >/dev/null 2>&1 || true
|
||||
MAIL_CSS=$(cat <<'CSS'
|
||||
.mail-message-body, .mail-message-body pre, .mail-message-body code, .mail-message-body table {
|
||||
font-family: "Inter", "Source Sans 3", "Helvetica Neue", Arial, sans-serif;
|
||||
font-size: 14px;
|
||||
line-height: 1.6;
|
||||
color: var(--color-main-text);
|
||||
}
|
||||
.mail-message-body pre {
|
||||
background: rgba(15, 23, 42, 0.06);
|
||||
padding: 12px;
|
||||
border-radius: 8px;
|
||||
}
|
||||
.mail-message-body blockquote {
|
||||
border-left: 3px solid var(--color-border);
|
||||
padding-left: 12px;
|
||||
margin: 8px 0;
|
||||
color: var(--color-text-lighter);
|
||||
}
|
||||
.mail-message-body img {
|
||||
max-width: 100%;
|
||||
border-radius: 6px;
|
||||
}
|
||||
CSS
|
||||
)
|
||||
run_occ config:app:set customcss css --value "${MAIL_CSS}" >/dev/null
|
||||
|
||||
log "Setting default quota to 250 GB"
|
||||
run_occ config:app:set files default_quota --value "250 GB"
|
||||
|
||||
API_BASE="${NC_URL}/ocs/v2.php/apps/external/api/v1"
|
||||
AUTH=(-u "${ADMIN_USER}:${ADMIN_PASS}" -H "OCS-APIRequest: true")
|
||||
|
||||
log "Removing existing external links"
|
||||
existing=$(curl -sf "${AUTH[@]}" "${API_BASE}?format=json" | jq -r '.ocs.data[].id // empty')
|
||||
for id in ${existing}; do
|
||||
curl -sf "${AUTH[@]}" -X DELETE "${API_BASE}/sites/${id}?format=json" >/dev/null || true
|
||||
done
|
||||
|
||||
SITES=(
|
||||
"Vaultwarden|https://vault.bstein.dev"
|
||||
"Jellyfin|https://stream.bstein.dev"
|
||||
"Gitea|https://scm.bstein.dev"
|
||||
"Jenkins|https://ci.bstein.dev"
|
||||
"Harbor|https://registry.bstein.dev"
|
||||
"Vault|https://secret.bstein.dev"
|
||||
"Jitsi|https://meet.bstein.dev"
|
||||
"Grafana|https://metrics.bstein.dev"
|
||||
"Chat LLM|https://chat.ai.bstein.dev"
|
||||
"Vision|https://draw.ai.bstein.dev"
|
||||
"STT/TTS|https://talk.ai.bstein.dev"
|
||||
)
|
||||
|
||||
log "Seeding external links"
|
||||
for entry in "${SITES[@]}"; do
|
||||
IFS="|" read -r name url <<<"${entry}"
|
||||
curl -sf "${AUTH[@]}" -X POST "${API_BASE}/sites?format=json" \
|
||||
-d "name=${name}" \
|
||||
-d "url=${url}" \
|
||||
-d "lang=" \
|
||||
-d "type=link" \
|
||||
-d "device=" \
|
||||
-d "icon=" \
|
||||
-d "groups[]=" \
|
||||
-d "redirect=1" >/dev/null
|
||||
done
|
||||
|
||||
log "Maintenance run completed"
|
||||
@ -9,3 +9,4 @@ spec:
|
||||
spec:
|
||||
nodeSelector:
|
||||
hardware: rpi5
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
|
||||
155
services/typhon/deployment.yaml
Normal file
155
services/typhon/deployment.yaml
Normal file
@ -0,0 +1,155 @@
|
||||
# services/typhon/deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: typhon
|
||||
namespace: climate
|
||||
labels:
|
||||
app: typhon
|
||||
spec:
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
app: typhon
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
rollingUpdate:
|
||||
maxSurge: 0
|
||||
maxUnavailable: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: typhon
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "9108"
|
||||
prometheus.io/path: "/metrics"
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/role: "typhon"
|
||||
vault.hashicorp.com/agent-inject-secret-aci-env.sh: "kv/data/atlas/climate/typhon/ac-infinity"
|
||||
vault.hashicorp.com/agent-inject-template-aci-env.sh: |
|
||||
{{- with secret "kv/data/atlas/climate/typhon/ac-infinity" -}}
|
||||
{{- if index .Data.data "ACI_EMAIL" }}
|
||||
export ACI_EMAIL="{{ index .Data.data "ACI_EMAIL" }}"
|
||||
{{- end }}
|
||||
{{- if index .Data.data "ACI_PASSWORD" }}
|
||||
export ACI_PASSWORD="{{ index .Data.data "ACI_PASSWORD" }}"
|
||||
{{- end }}
|
||||
{{- if index .Data.data "ACI_HOST" }}
|
||||
export ACI_HOST="{{ index .Data.data "ACI_HOST" }}"
|
||||
{{- end }}
|
||||
{{- if index .Data.data "TYPHON_MODE" }}
|
||||
export TYPHON_MODE="{{ index .Data.data "TYPHON_MODE" }}"
|
||||
{{- end }}
|
||||
{{- if index .Data.data "ENABLE_CONTROL_API" }}
|
||||
export ENABLE_CONTROL_API="{{ index .Data.data "ENABLE_CONTROL_API" }}"
|
||||
{{- end }}
|
||||
{{- if index .Data.data "TY_BLE_DEFAULT_MAC" }}
|
||||
export TY_BLE_DEFAULT_MAC="{{ index .Data.data "TY_BLE_DEFAULT_MAC" }}"
|
||||
{{- end }}
|
||||
{{- if index .Data.data "TY_BLE_ALLOWED_MACS" }}
|
||||
export TY_BLE_ALLOWED_MACS="{{ index .Data.data "TY_BLE_ALLOWED_MACS" }}"
|
||||
{{- end }}
|
||||
{{- if index .Data.data "TY_BLE_DEVICE_TYPE" }}
|
||||
export TY_BLE_DEVICE_TYPE="{{ index .Data.data "TY_BLE_DEVICE_TYPE" }}"
|
||||
{{- end }}
|
||||
{{- if index .Data.data "TY_BLE_SCAN_TIMEOUT_MS" }}
|
||||
export TY_BLE_SCAN_TIMEOUT_MS="{{ index .Data.data "TY_BLE_SCAN_TIMEOUT_MS" }}"
|
||||
{{- end }}
|
||||
{{- if index .Data.data "TY_BLE_PORT_BASE" }}
|
||||
export TY_BLE_PORT_BASE="{{ index .Data.data "TY_BLE_PORT_BASE" }}"
|
||||
{{- end }}
|
||||
{{- end -}}
|
||||
typhon.bstein.dev/restart-rev: "2"
|
||||
spec:
|
||||
serviceAccountName: typhon
|
||||
imagePullSecrets:
|
||||
- name: harbor-regcred
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: amd64
|
||||
kubernetes.io/hostname: titan-22
|
||||
containers:
|
||||
- name: typhon
|
||||
image: registry.bstein.dev/bstein/typhon:main
|
||||
imagePullPolicy: Always
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- |
|
||||
set -eu
|
||||
if [ -f /vault/secrets/aci-env.sh ]; then
|
||||
. /vault/secrets/aci-env.sh
|
||||
fi
|
||||
: "${TYPHON_MODE:=cloud}"
|
||||
: "${ENABLE_CONTROL_API:=false}"
|
||||
exec node dist/index.js
|
||||
env:
|
||||
- name: TYPHON_MODE
|
||||
value: "cloud"
|
||||
- name: ENABLE_CONTROL_API
|
||||
value: "false"
|
||||
- name: POLL_INTERVAL_SECONDS
|
||||
value: "30"
|
||||
- name: REQUEST_TIMEOUT_MS
|
||||
value: "10000"
|
||||
- name: LISTEN_PORT
|
||||
value: "9108"
|
||||
- name: CONTROL_LISTEN_PORT
|
||||
value: "9110"
|
||||
- name: TY_BLE_DEVICE_TYPE
|
||||
value: "11"
|
||||
- name: TY_BLE_SCAN_TIMEOUT_MS
|
||||
value: "20000"
|
||||
- name: TY_BLE_PORT_BASE
|
||||
value: "1"
|
||||
- name: LOG_LEVEL
|
||||
value: "info"
|
||||
ports:
|
||||
- name: metrics
|
||||
containerPort: 9108
|
||||
- name: control
|
||||
containerPort: 9110
|
||||
volumeMounts:
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
- name: dbus-socket
|
||||
mountPath: /run/dbus/system_bus_socket
|
||||
readOnly: true
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: metrics
|
||||
initialDelaySeconds: 20
|
||||
periodSeconds: 20
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: metrics
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 3
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 65532
|
||||
runAsGroup: 65532
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop: ["ALL"]
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
- name: dbus-socket
|
||||
hostPath:
|
||||
path: /run/dbus/system_bus_socket
|
||||
type: Socket
|
||||
12
services/typhon/kustomization.yaml
Normal file
12
services/typhon/kustomization.yaml
Normal file
@ -0,0 +1,12 @@
|
||||
# services/typhon/kustomization.yaml
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
namespace: climate
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- serviceaccount.yaml
|
||||
- secretproviderclass.yaml
|
||||
- vault-sync-deployment.yaml
|
||||
- deployment.yaml
|
||||
- service.yaml
|
||||
- networkpolicy.yaml
|
||||
5
services/typhon/namespace.yaml
Normal file
5
services/typhon/namespace.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
# services/typhon/namespace.yaml
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: climate
|
||||
53
services/typhon/networkpolicy.yaml
Normal file
53
services/typhon/networkpolicy.yaml
Normal file
@ -0,0 +1,53 @@
|
||||
# services/typhon/networkpolicy.yaml
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: NetworkPolicy
|
||||
metadata:
|
||||
name: typhon
|
||||
namespace: climate
|
||||
spec:
|
||||
podSelector:
|
||||
matchLabels:
|
||||
app: typhon
|
||||
policyTypes:
|
||||
- Ingress
|
||||
- Egress
|
||||
ingress:
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: monitoring
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 9108
|
||||
- from:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: climate
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 9110
|
||||
egress:
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: kube-system
|
||||
ports:
|
||||
- protocol: UDP
|
||||
port: 53
|
||||
- protocol: TCP
|
||||
port: 53
|
||||
- to:
|
||||
- namespaceSelector:
|
||||
matchLabels:
|
||||
kubernetes.io/metadata.name: vault
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 8200
|
||||
- to:
|
||||
- ipBlock:
|
||||
cidr: 0.0.0.0/0
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 80
|
||||
- protocol: TCP
|
||||
port: 443
|
||||
21
services/typhon/secretproviderclass.yaml
Normal file
21
services/typhon/secretproviderclass.yaml
Normal file
@ -0,0 +1,21 @@
|
||||
# services/typhon/secretproviderclass.yaml
|
||||
apiVersion: secrets-store.csi.x-k8s.io/v1
|
||||
kind: SecretProviderClass
|
||||
metadata:
|
||||
name: typhon-vault
|
||||
namespace: climate
|
||||
spec:
|
||||
provider: vault
|
||||
parameters:
|
||||
vaultAddress: "http://vault.vault.svc.cluster.local:8200"
|
||||
roleName: "typhon"
|
||||
objects: |
|
||||
- objectName: "harbor-pull__dockerconfigjson"
|
||||
secretPath: "kv/data/atlas/shared/harbor-pull"
|
||||
secretKey: "dockerconfigjson"
|
||||
secretObjects:
|
||||
- secretName: harbor-regcred
|
||||
type: kubernetes.io/dockerconfigjson
|
||||
data:
|
||||
- objectName: harbor-pull__dockerconfigjson
|
||||
key: .dockerconfigjson
|
||||
21
services/typhon/service.yaml
Normal file
21
services/typhon/service.yaml
Normal file
@ -0,0 +1,21 @@
|
||||
# services/typhon/service.yaml
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: typhon
|
||||
namespace: climate
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "9108"
|
||||
prometheus.io/path: "/metrics"
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: typhon
|
||||
ports:
|
||||
- name: metrics
|
||||
port: 9108
|
||||
targetPort: metrics
|
||||
- name: control
|
||||
port: 9110
|
||||
targetPort: control
|
||||
6
services/typhon/serviceaccount.yaml
Normal file
6
services/typhon/serviceaccount.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
# services/typhon/serviceaccount.yaml
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: typhon
|
||||
namespace: climate
|
||||
34
services/typhon/vault-sync-deployment.yaml
Normal file
34
services/typhon/vault-sync-deployment.yaml
Normal file
@ -0,0 +1,34 @@
|
||||
# services/typhon/vault-sync-deployment.yaml
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: typhon-vault-sync
|
||||
namespace: climate
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: typhon-vault-sync
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: typhon-vault-sync
|
||||
spec:
|
||||
serviceAccountName: typhon
|
||||
containers:
|
||||
- name: sync
|
||||
image: alpine:3.20
|
||||
command: ["/bin/sh", "-c"]
|
||||
args:
|
||||
- "sleep infinity"
|
||||
volumeMounts:
|
||||
- name: vault-secrets
|
||||
mountPath: /vault/secrets
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: vault-secrets
|
||||
csi:
|
||||
driver: secrets-store.csi.k8s.io
|
||||
readOnly: true
|
||||
volumeAttributes:
|
||||
secretProviderClass: typhon-vault
|
||||
@ -1,55 +0,0 @@
|
||||
# services/vault/k8s-auth-config-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: vault-k8s-auth-config
|
||||
namespace: vault
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "*/15 * * * *"
|
||||
suspend: false
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 1
|
||||
template:
|
||||
spec:
|
||||
serviceAccountName: vault-admin
|
||||
restartPolicy: Never
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: configure-k8s-auth
|
||||
image: hashicorp/vault:1.17.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- sh
|
||||
- /scripts/vault_k8s_auth_configure.sh
|
||||
env:
|
||||
- name: VAULT_ADDR
|
||||
value: http://10.43.57.249:8200
|
||||
- name: VAULT_K8S_ROLE
|
||||
value: vault-admin
|
||||
- name: VAULT_K8S_TOKEN_REVIEWER_JWT_FILE
|
||||
value: /var/run/secrets/vault-token-reviewer/token
|
||||
- name: VAULT_K8S_ROLE_TTL
|
||||
value: 1h
|
||||
volumeMounts:
|
||||
- name: k8s-auth-config-script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
- name: token-reviewer
|
||||
mountPath: /var/run/secrets/vault-token-reviewer
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: k8s-auth-config-script
|
||||
configMap:
|
||||
name: vault-k8s-auth-config-script
|
||||
defaultMode: 0555
|
||||
- name: token-reviewer
|
||||
secret:
|
||||
secretName: vault-admin-token-reviewer
|
||||
@ -10,21 +10,9 @@ resources:
|
||||
- rbac.yaml
|
||||
- configmap.yaml
|
||||
- statefulset.yaml
|
||||
- k8s-auth-config-cronjob.yaml
|
||||
- oidc-config-cronjob.yaml
|
||||
- service.yaml
|
||||
- ingress.yaml
|
||||
- certificate.yaml
|
||||
- serverstransport.yaml
|
||||
generatorOptions:
|
||||
disableNameSuffixHash: true
|
||||
configMapGenerator:
|
||||
- name: vault-oidc-config-script
|
||||
files:
|
||||
- vault_oidc_configure.sh=scripts/vault_oidc_configure.sh
|
||||
- name: vault-k8s-auth-config-script
|
||||
files:
|
||||
- vault_k8s_auth_configure.sh=scripts/vault_k8s_auth_configure.sh
|
||||
- name: vault-entrypoint
|
||||
files:
|
||||
- vault-entrypoint.sh=scripts/vault-entrypoint.sh
|
||||
|
||||
@ -1,83 +0,0 @@
|
||||
# services/vault/oidc-config-cronjob.yaml
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: vault-oidc-config
|
||||
namespace: vault
|
||||
labels:
|
||||
atlas.bstein.dev/glue: "true"
|
||||
spec:
|
||||
schedule: "*/15 * * * *"
|
||||
suspend: true
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 3
|
||||
jobTemplate:
|
||||
spec:
|
||||
backoffLimit: 1
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
vault.hashicorp.com/agent-inject: "true"
|
||||
vault.hashicorp.com/agent-pre-populate-only: "true"
|
||||
vault.hashicorp.com/role: "vault-admin"
|
||||
vault.hashicorp.com/agent-inject-secret-vault-oidc-env.sh: "kv/data/atlas/vault/vault-oidc-config"
|
||||
vault.hashicorp.com/agent-inject-template-vault-oidc-env.sh: |
|
||||
{{ with secret "kv/data/atlas/vault/vault-oidc-config" }}
|
||||
export VAULT_OIDC_DISCOVERY_URL="{{ .Data.data.discovery_url }}"
|
||||
export VAULT_OIDC_CLIENT_ID="{{ .Data.data.client_id }}"
|
||||
export VAULT_OIDC_CLIENT_SECRET="{{ .Data.data.client_secret }}"
|
||||
export VAULT_OIDC_DEFAULT_ROLE="{{ .Data.data.default_role }}"
|
||||
export VAULT_OIDC_SCOPES="{{ .Data.data.scopes }}"
|
||||
export VAULT_OIDC_USER_CLAIM="{{ .Data.data.user_claim }}"
|
||||
export VAULT_OIDC_GROUPS_CLAIM="{{ .Data.data.groups_claim }}"
|
||||
export VAULT_OIDC_TOKEN_POLICIES="{{ .Data.data.token_policies }}"
|
||||
export VAULT_OIDC_ADMIN_GROUP="{{ .Data.data.admin_group }}"
|
||||
export VAULT_OIDC_ADMIN_POLICIES="{{ .Data.data.admin_policies }}"
|
||||
export VAULT_OIDC_DEV_GROUP="{{ .Data.data.dev_group }}"
|
||||
export VAULT_OIDC_DEV_POLICIES="{{ .Data.data.dev_policies }}"
|
||||
export VAULT_OIDC_USER_GROUP="{{ .Data.data.user_group }}"
|
||||
export VAULT_OIDC_USER_POLICIES="{{ .Data.data.user_policies }}"
|
||||
export VAULT_OIDC_REDIRECT_URIS="{{ .Data.data.redirect_uris }}"
|
||||
export VAULT_OIDC_BOUND_AUDIENCES="{{ .Data.data.bound_audiences }}"
|
||||
export VAULT_OIDC_BOUND_CLAIMS="{{ .Data.data.bound_claims }}"
|
||||
export VAULT_OIDC_BOUND_CLAIMS_TYPE="{{ .Data.data.bound_claims_type }}"
|
||||
{{ end }}
|
||||
spec:
|
||||
serviceAccountName: vault-admin
|
||||
restartPolicy: Never
|
||||
nodeSelector:
|
||||
kubernetes.io/arch: arm64
|
||||
node-role.kubernetes.io/worker: "true"
|
||||
containers:
|
||||
- name: configure-oidc
|
||||
image: hashicorp/vault:1.17.6
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- /entrypoint.sh
|
||||
args:
|
||||
- sh
|
||||
- /scripts/vault_oidc_configure.sh
|
||||
env:
|
||||
- name: VAULT_ADDR
|
||||
value: http://10.43.57.249:8200
|
||||
- name: VAULT_K8S_ROLE
|
||||
value: vault-admin
|
||||
- name: VAULT_ENV_FILE
|
||||
value: /vault/secrets/vault-oidc-env.sh
|
||||
volumeMounts:
|
||||
- name: vault-entrypoint
|
||||
mountPath: /entrypoint.sh
|
||||
subPath: vault-entrypoint.sh
|
||||
- name: oidc-config-script
|
||||
mountPath: /scripts
|
||||
readOnly: true
|
||||
volumes:
|
||||
- name: vault-entrypoint
|
||||
configMap:
|
||||
name: vault-entrypoint
|
||||
defaultMode: 493
|
||||
- name: oidc-config-script
|
||||
configMap:
|
||||
name: vault-oidc-config-script
|
||||
defaultMode: 0555
|
||||
@ -1,34 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -eu
|
||||
|
||||
if [ -n "${VAULT_ENV_FILE:-}" ]; then
|
||||
if [ -f "${VAULT_ENV_FILE}" ]; then
|
||||
# shellcheck disable=SC1090
|
||||
. "${VAULT_ENV_FILE}"
|
||||
else
|
||||
echo "Vault env file not found: ${VAULT_ENV_FILE}" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ -n "${VAULT_COPY_FILES:-}" ]; then
|
||||
old_ifs="$IFS"
|
||||
IFS=','
|
||||
for pair in ${VAULT_COPY_FILES}; do
|
||||
src="${pair%%:*}"
|
||||
dest="${pair#*:}"
|
||||
if [ -z "${src}" ] || [ -z "${dest}" ]; then
|
||||
echo "Vault copy entry malformed: ${pair}" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [ ! -f "${src}" ]; then
|
||||
echo "Vault file not found: ${src}" >&2
|
||||
exit 1
|
||||
fi
|
||||
mkdir -p "$(dirname "${dest}")"
|
||||
cp "${src}" "${dest}"
|
||||
done
|
||||
IFS="$old_ifs"
|
||||
fi
|
||||
|
||||
exec "$@"
|
||||
@ -1,259 +0,0 @@
|
||||
#!/usr/bin/env sh
|
||||
set -eu
|
||||
|
||||
log() { echo "[vault-k8s-auth] $*"; }
|
||||
|
||||
vault_cmd() {
|
||||
for attempt in 1 2 3 4 5 6; do
|
||||
set +e
|
||||
output="$(vault "$@" 2>&1)"
|
||||
status=$?
|
||||
set -e
|
||||
if [ "${status}" -eq 0 ]; then
|
||||
printf '%s' "${output}"
|
||||
return 0
|
||||
fi
|
||||
log "vault command failed; retrying (${attempt}/6)"
|
||||
sleep $((attempt * 2))
|
||||
done
|
||||
log "vault command failed; giving up"
|
||||
return 1
|
||||
}
|
||||
|
||||
ensure_token() {
|
||||
if [ -n "${VAULT_TOKEN:-}" ]; then
|
||||
return
|
||||
fi
|
||||
role="${VAULT_K8S_ROLE:-vault}"
|
||||
jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
|
||||
if ! VAULT_TOKEN="$(vault_cmd write -field=token auth/kubernetes/login role="${role}" jwt="${jwt}")"; then
|
||||
log "kubernetes auth login failed; set VAULT_TOKEN or fix role ${role}"
|
||||
exit 1
|
||||
fi
|
||||
export VAULT_TOKEN
|
||||
}
|
||||
|
||||
if ! status_json="$(vault_cmd status -format=json)"; then
|
||||
log "vault status failed; check VAULT_ADDR and VAULT_TOKEN"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! printf '%s' "${status_json}" | grep -q '"initialized":[[:space:]]*true'; then
|
||||
log "vault not initialized; skipping"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if printf '%s' "${status_json}" | grep -q '"sealed":[[:space:]]*true'; then
|
||||
log "vault sealed; skipping"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
ensure_token
|
||||
|
||||
k8s_host="https://${KUBERNETES_SERVICE_HOST}:443"
|
||||
k8s_ca="$(cat /var/run/secrets/kubernetes.io/serviceaccount/ca.crt)"
|
||||
k8s_token="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
|
||||
role_ttl="${VAULT_K8S_ROLE_TTL:-1h}"
|
||||
token_reviewer_jwt="${VAULT_K8S_TOKEN_REVIEWER_JWT:-}"
|
||||
|
||||
if [ -z "${token_reviewer_jwt}" ] && [ -n "${VAULT_K8S_TOKEN_REVIEWER_JWT_FILE:-}" ] && [ -r "${VAULT_K8S_TOKEN_REVIEWER_JWT_FILE}" ]; then
|
||||
token_reviewer_jwt="$(cat "${VAULT_K8S_TOKEN_REVIEWER_JWT_FILE}")"
|
||||
fi
|
||||
if [ -z "${token_reviewer_jwt}" ]; then
|
||||
token_reviewer_jwt="${k8s_token}"
|
||||
fi
|
||||
|
||||
if ! vault_cmd auth list -format=json | grep -q '"kubernetes/"'; then
|
||||
log "enabling kubernetes auth"
|
||||
vault_cmd auth enable kubernetes
|
||||
fi
|
||||
|
||||
log "configuring kubernetes auth"
|
||||
vault_cmd write auth/kubernetes/config \
|
||||
token_reviewer_jwt="${token_reviewer_jwt}" \
|
||||
kubernetes_host="${k8s_host}" \
|
||||
kubernetes_ca_cert="${k8s_ca}"
|
||||
|
||||
write_raw_policy() {
|
||||
name="$1"
|
||||
body="$2"
|
||||
log "writing policy ${name}"
|
||||
printf '%s\n' "${body}" | vault_cmd policy write "${name}" -
|
||||
}
|
||||
|
||||
write_policy_and_role() {
|
||||
role="$1"
|
||||
namespace="$2"
|
||||
service_accounts="$3"
|
||||
read_paths="$4"
|
||||
write_paths="$5"
|
||||
|
||||
policy_body=""
|
||||
for path in ${read_paths}; do
|
||||
policy_body="${policy_body}
|
||||
path \"kv/data/atlas/${path}\" {
|
||||
capabilities = [\"read\"]
|
||||
}
|
||||
path \"kv/metadata/atlas/${path}\" {
|
||||
capabilities = [\"list\"]
|
||||
}
|
||||
"
|
||||
done
|
||||
for path in ${write_paths}; do
|
||||
policy_body="${policy_body}
|
||||
path \"kv/data/atlas/${path}\" {
|
||||
capabilities = [\"create\", \"update\", \"read\"]
|
||||
}
|
||||
path \"kv/metadata/atlas/${path}\" {
|
||||
capabilities = [\"list\"]
|
||||
}
|
||||
"
|
||||
done
|
||||
|
||||
log "writing policy ${role}"
|
||||
printf '%s\n' "${policy_body}" | vault_cmd policy write "${role}" -
|
||||
|
||||
log "writing role ${role}"
|
||||
vault_cmd write "auth/kubernetes/role/${role}" \
|
||||
bound_service_account_names="${service_accounts}" \
|
||||
bound_service_account_namespaces="${namespace}" \
|
||||
policies="${role}" \
|
||||
ttl="${role_ttl}"
|
||||
}
|
||||
|
||||
vault_admin_policy='
|
||||
path "sys/auth" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
path "sys/auth/*" {
|
||||
capabilities = ["create", "update", "delete", "sudo", "read"]
|
||||
}
|
||||
path "auth/kubernetes/*" {
|
||||
capabilities = ["create", "update", "read"]
|
||||
}
|
||||
path "auth/oidc/*" {
|
||||
capabilities = ["create", "update", "read"]
|
||||
}
|
||||
path "sys/policies/acl" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "sys/policies/acl/*" {
|
||||
capabilities = ["create", "update", "read"]
|
||||
}
|
||||
path "sys/internal/ui/mounts" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
path "sys/mounts" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
path "sys/mounts/auth/*" {
|
||||
capabilities = ["read", "update", "sudo"]
|
||||
}
|
||||
path "kv/data/atlas/vault/*" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
path "kv/metadata/atlas/vault/*" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/data/*" {
|
||||
capabilities = ["create", "update", "read", "delete", "patch"]
|
||||
}
|
||||
path "kv/metadata" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/metadata/*" {
|
||||
capabilities = ["read", "list", "delete"]
|
||||
}
|
||||
path "kv/data/atlas/shared/*" {
|
||||
capabilities = ["create", "update", "read", "patch"]
|
||||
}
|
||||
path "kv/metadata/atlas/shared/*" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
'
|
||||
|
||||
write_raw_policy "vault-admin" "${vault_admin_policy}"
|
||||
dev_kv_policy='
|
||||
path "kv/metadata" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/metadata/atlas" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/metadata/atlas/shared" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/metadata/atlas/shared/*" {
|
||||
capabilities = ["list"]
|
||||
}
|
||||
path "kv/data/atlas/shared/*" {
|
||||
capabilities = ["read"]
|
||||
}
|
||||
'
|
||||
write_raw_policy "dev-kv" "${dev_kv_policy}"
|
||||
log "writing role vault-admin"
|
||||
vault_cmd write "auth/kubernetes/role/vault-admin" \
|
||||
bound_service_account_names="vault-admin,ariadne" \
|
||||
bound_service_account_namespaces="vault,maintenance" \
|
||||
policies="vault-admin" \
|
||||
ttl="${role_ttl}"
|
||||
|
||||
write_policy_and_role "outline" "outline" "outline-vault" \
|
||||
"outline/* shared/postmark-relay" ""
|
||||
write_policy_and_role "planka" "planka" "planka-vault" \
|
||||
"planka/* shared/postmark-relay" ""
|
||||
write_policy_and_role "bstein-dev-home" "bstein-dev-home" "bstein-dev-home,bstein-dev-home-vault-sync" \
|
||||
"portal/* shared/chat-ai-keys-runtime shared/portal-e2e-client shared/postmark-relay mailu/mailu-initial-account-secret shared/harbor-pull" ""
|
||||
write_policy_and_role "gitea" "gitea" "gitea-vault" \
|
||||
"gitea/*" ""
|
||||
write_policy_and_role "vaultwarden" "vaultwarden" "vaultwarden-vault" \
|
||||
"vaultwarden/* mailu/mailu-initial-account-secret" ""
|
||||
write_policy_and_role "sso" "sso" "sso-vault,sso-vault-sync,mas-secrets-ensure" \
|
||||
"sso/* portal/bstein-dev-home-keycloak-admin shared/keycloak-admin shared/portal-e2e-client shared/postmark-relay shared/harbor-pull" ""
|
||||
write_policy_and_role "mailu-mailserver" "mailu-mailserver" "mailu-vault-sync" \
|
||||
"mailu/* shared/postmark-relay shared/harbor-pull" ""
|
||||
write_policy_and_role "harbor" "harbor" "harbor-vault-sync" \
|
||||
"harbor/* shared/harbor-pull" ""
|
||||
write_policy_and_role "nextcloud" "nextcloud" "nextcloud-vault" \
|
||||
"nextcloud/* shared/keycloak-admin shared/postmark-relay" ""
|
||||
write_policy_and_role "comms" "comms" "comms-vault,atlasbot" \
|
||||
"comms/* shared/chat-ai-keys-runtime shared/harbor-pull" ""
|
||||
write_policy_and_role "jenkins" "jenkins" "jenkins,jenkins-vault-sync" \
|
||||
"jenkins/* shared/harbor-pull" ""
|
||||
write_policy_and_role "monitoring" "monitoring" "monitoring-vault-sync" \
|
||||
"monitoring/* shared/postmark-relay shared/harbor-pull" ""
|
||||
write_policy_and_role "logging" "logging" "logging-vault-sync" \
|
||||
"logging/* shared/harbor-pull" ""
|
||||
write_policy_and_role "pegasus" "jellyfin" "pegasus-vault-sync" \
|
||||
"pegasus/* shared/harbor-pull" ""
|
||||
write_policy_and_role "crypto" "crypto" "crypto-vault-sync" \
|
||||
"crypto/* shared/harbor-pull" ""
|
||||
write_policy_and_role "health" "health" "health-vault-sync" \
|
||||
"health/*" ""
|
||||
write_policy_and_role "maintenance" "maintenance" "ariadne,maintenance-vault-sync,metis" \
|
||||
"maintenance/ariadne-db maintenance/metis-oidc maintenance/metis-ssh-keys maintenance/metis-runtime portal/atlas-portal-db portal/bstein-dev-home-keycloak-admin mailu/mailu-db-secret mailu/mailu-initial-account-secret nextcloud/nextcloud-db nextcloud/nextcloud-admin health/wger-admin finance/firefly-secrets comms/mas-admin-client-runtime comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin vault/vault-oidc-config shared/harbor-pull harbor/harbor-core" ""
|
||||
write_policy_and_role "maintenance-metis-token-sync" "maintenance" "metis-token-sync" \
|
||||
"" \
|
||||
"maintenance/metis-runtime"
|
||||
write_policy_and_role "finance" "finance" "finance-vault" \
|
||||
"finance/* shared/postmark-relay" ""
|
||||
write_policy_and_role "finance-secrets" "finance" "finance-secrets-ensure" \
|
||||
"" \
|
||||
"finance/*"
|
||||
write_policy_and_role "longhorn" "longhorn-system" "longhorn-vault,longhorn-vault-sync" \
|
||||
"longhorn/* shared/harbor-pull" ""
|
||||
write_policy_and_role "postgres" "postgres" "postgres-vault" \
|
||||
"postgres/postgres-db" ""
|
||||
write_policy_and_role "vault" "vault" "vault" \
|
||||
"vault/*" ""
|
||||
|
||||
write_policy_and_role "sso-secrets" "sso" "mas-secrets-ensure" \
|
||||
"shared/keycloak-admin maintenance/metis-ssh-keys" \
|
||||
"harbor/harbor-oidc vault/vault-oidc-config comms/synapse-oidc logging/oauth2-proxy-logs-oidc finance/actual-oidc maintenance/metis-oidc maintenance/metis-ssh-keys"
|
||||
write_policy_and_role "crypto-secrets" "crypto" "crypto-secrets-ensure" \
|
||||
"" \
|
||||
"crypto/wallet-monero-temp-rpc-auth"
|
||||
write_policy_and_role "comms-secrets" "comms" \
|
||||
"comms-secrets-ensure,mas-db-ensure,mas-admin-client-secret-writer,othrys-synapse-signingkey-job" \
|
||||
"" \
|
||||
"comms/turn-shared-secret comms/livekit-api comms/synapse-redis comms/synapse-macaroon comms/atlasbot-credentials-runtime comms/synapse-db comms/synapse-admin comms/synapse-registration comms/mas-db comms/mas-admin-client-runtime comms/mas-secrets-runtime comms/othrys-synapse-signingkey"
|
||||
@ -1,166 +0,0 @@
|
||||
#!/usr/bin/env sh
|
||||
set -eu
|
||||
|
||||
log() { echo "[vault-oidc] $*"; }
|
||||
|
||||
vault_cmd() {
|
||||
for attempt in 1 2 3 4 5 6; do
|
||||
set +e
|
||||
output="$(vault "$@" 2>&1)"
|
||||
status=$?
|
||||
set -e
|
||||
if [ "${status}" -eq 0 ]; then
|
||||
printf '%s' "${output}"
|
||||
return 0
|
||||
fi
|
||||
log "vault command failed; retrying (${attempt}/6)"
|
||||
sleep $((attempt * 2))
|
||||
done
|
||||
log "vault command failed; giving up"
|
||||
return 1
|
||||
}
|
||||
|
||||
ensure_token() {
|
||||
if [ -n "${VAULT_TOKEN:-}" ]; then
|
||||
return
|
||||
fi
|
||||
role="${VAULT_K8S_ROLE:-vault}"
|
||||
jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)"
|
||||
if ! VAULT_TOKEN="$(vault_cmd write -field=token auth/kubernetes/login role="${role}" jwt="${jwt}")"; then
|
||||
log "kubernetes auth login failed; set VAULT_TOKEN or fix role ${role}"
|
||||
exit 1
|
||||
fi
|
||||
export VAULT_TOKEN
|
||||
}
|
||||
|
||||
if ! status_json="$(vault_cmd status -format=json)"; then
|
||||
log "vault status failed; check VAULT_ADDR and VAULT_TOKEN"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! printf '%s' "${status_json}" | grep -q '"initialized":[[:space:]]*true'; then
|
||||
log "vault not initialized; skipping"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if printf '%s' "${status_json}" | grep -q '"sealed":[[:space:]]*true'; then
|
||||
log "vault sealed; skipping"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
ensure_token
|
||||
|
||||
: "${VAULT_OIDC_DISCOVERY_URL:?set VAULT_OIDC_DISCOVERY_URL}"
|
||||
: "${VAULT_OIDC_CLIENT_ID:?set VAULT_OIDC_CLIENT_ID}"
|
||||
: "${VAULT_OIDC_CLIENT_SECRET:?set VAULT_OIDC_CLIENT_SECRET}"
|
||||
|
||||
default_role="${VAULT_OIDC_DEFAULT_ROLE:-admin}"
|
||||
scopes="${VAULT_OIDC_SCOPES:-openid profile email groups}"
|
||||
user_claim="${VAULT_OIDC_USER_CLAIM:-preferred_username}"
|
||||
groups_claim="${VAULT_OIDC_GROUPS_CLAIM:-groups}"
|
||||
redirect_uris="${VAULT_OIDC_REDIRECT_URIS:-https://secret.bstein.dev/ui/vault/auth/oidc/oidc/callback}"
|
||||
bound_audiences="${VAULT_OIDC_BOUND_AUDIENCES:-${VAULT_OIDC_CLIENT_ID}}"
|
||||
bound_claims_type="${VAULT_OIDC_BOUND_CLAIMS_TYPE:-string}"
|
||||
bound_claims_type="$(printf '%s' "${bound_claims_type}" | tr -d '[:space:]')"
|
||||
if [ -z "${bound_claims_type}" ] || [ "${bound_claims_type}" = "<novalue>" ]; then
|
||||
bound_claims_type="string"
|
||||
fi
|
||||
|
||||
admin_group="${VAULT_OIDC_ADMIN_GROUP:-admin}"
|
||||
admin_policies="${VAULT_OIDC_ADMIN_POLICIES:-default,vault-admin}"
|
||||
dev_group="${VAULT_OIDC_DEV_GROUP:-dev}"
|
||||
dev_policies="${VAULT_OIDC_DEV_POLICIES:-default,dev-kv}"
|
||||
user_group="${VAULT_OIDC_USER_GROUP:-${dev_group}}"
|
||||
user_policies="${VAULT_OIDC_USER_POLICIES:-${VAULT_OIDC_TOKEN_POLICIES:-${dev_policies}}}"
|
||||
|
||||
if ! vault_cmd auth list -format=json | grep -q '"oidc/"'; then
|
||||
log "enabling oidc auth method"
|
||||
vault_cmd auth enable oidc
|
||||
fi
|
||||
|
||||
log "configuring oidc auth"
|
||||
vault_cmd write auth/oidc/config \
|
||||
oidc_discovery_url="${VAULT_OIDC_DISCOVERY_URL}" \
|
||||
oidc_client_id="${VAULT_OIDC_CLIENT_ID}" \
|
||||
oidc_client_secret="${VAULT_OIDC_CLIENT_SECRET}" \
|
||||
default_role="${default_role}"
|
||||
|
||||
vault_cmd auth tune -listing-visibility=unauth oidc >/dev/null
|
||||
|
||||
build_bound_claims() {
|
||||
claim="$1"
|
||||
groups="$2"
|
||||
json="{\"${claim}\":["
|
||||
first=1
|
||||
old_ifs=$IFS
|
||||
IFS=,
|
||||
for item in $groups; do
|
||||
item="$(printf '%s' "$item" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
|
||||
if [ -z "${item}" ]; then
|
||||
continue
|
||||
fi
|
||||
if [ "${first}" -eq 0 ]; then
|
||||
json="${json},"
|
||||
fi
|
||||
json="${json}\"${item}\""
|
||||
first=0
|
||||
done
|
||||
IFS=$old_ifs
|
||||
json="${json}]}"
|
||||
printf '%s' "${json}"
|
||||
}
|
||||
|
||||
build_json_array() {
|
||||
items="$1"
|
||||
json="["
|
||||
first=1
|
||||
old_ifs=$IFS
|
||||
IFS=,
|
||||
for item in $items; do
|
||||
item="$(printf '%s' "$item" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')"
|
||||
if [ -z "${item}" ]; then
|
||||
continue
|
||||
fi
|
||||
if [ "${first}" -eq 0 ]; then
|
||||
json="${json},"
|
||||
fi
|
||||
json="${json}\"${item}\""
|
||||
first=0
|
||||
done
|
||||
IFS=$old_ifs
|
||||
json="${json}]"
|
||||
printf '%s' "${json}"
|
||||
}
|
||||
|
||||
configure_role() {
|
||||
role_name="$1"
|
||||
role_groups="$2"
|
||||
role_policies="$3"
|
||||
if [ -z "${role_name}" ] || [ -z "${role_groups}" ] || [ -z "${role_policies}" ]; then
|
||||
log "skipping role ${role_name} (missing groups or policies)"
|
||||
return
|
||||
fi
|
||||
claims="$(build_bound_claims "${groups_claim}" "${role_groups}")"
|
||||
scopes_csv="$(printf '%s' "${scopes}" | tr ' ' ',' | tr -s ',' | sed 's/^,//;s/,$//')"
|
||||
redirect_json="$(build_json_array "${redirect_uris}")"
|
||||
payload_file="$(mktemp)"
|
||||
cat > "${payload_file}" <<EOF
|
||||
{
|
||||
"user_claim": "${user_claim}",
|
||||
"oidc_scopes": "${scopes_csv}",
|
||||
"token_policies": "${role_policies}",
|
||||
"bound_audiences": "${bound_audiences}",
|
||||
"bound_claims": ${claims},
|
||||
"bound_claims_type": "${bound_claims_type}",
|
||||
"groups_claim": "${groups_claim}",
|
||||
"allowed_redirect_uris": ${redirect_json}
|
||||
}
|
||||
EOF
|
||||
log "configuring oidc role ${role_name}"
|
||||
vault_cmd write "auth/oidc/role/${role_name}" @"${payload_file}"
|
||||
rm -f "${payload_file}"
|
||||
}
|
||||
|
||||
configure_role "admin" "${admin_group}" "${admin_policies}"
|
||||
configure_role "dev" "${dev_group}" "${dev_policies}"
|
||||
configure_role "user" "${user_group}" "${user_policies}"
|
||||
1
testing/__init__.py
Normal file
1
testing/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
"""Top-level testing contract and quality-gate tooling for titan-iac."""
|
||||
164
testing/quality_contract.json
Normal file
164
testing/quality_contract.json
Normal file
@ -0,0 +1,164 @@
|
||||
{
|
||||
"required_docs": [
|
||||
{
|
||||
"path": "README.md",
|
||||
"description": "Top-level repository handbook."
|
||||
},
|
||||
{
|
||||
"path": "AGENTS.md",
|
||||
"description": "Shared repository operating instructions."
|
||||
},
|
||||
{
|
||||
"path": "Jenkinsfile",
|
||||
"description": "Top-level Jenkins mirror for multibranch discovery."
|
||||
},
|
||||
{
|
||||
"path": "ci/Jenkinsfile.titan-iac",
|
||||
"description": "Canonical titan-iac Jenkins pipeline definition."
|
||||
}
|
||||
],
|
||||
"managed_modules": [
|
||||
"ci/scripts/publish_test_metrics.py",
|
||||
"services/mailu/scripts/mailu_sync.py",
|
||||
"testing/__init__.py",
|
||||
"testing/quality_contract.py",
|
||||
"testing/quality_docs.py",
|
||||
"testing/quality_hygiene.py",
|
||||
"testing/quality_coverage.py",
|
||||
"testing/quality_gate.py"
|
||||
],
|
||||
"lint_paths": [
|
||||
"ci/scripts/publish_test_metrics.py",
|
||||
"ci/tests/glue",
|
||||
"scripts/tests",
|
||||
"services/comms/scripts/tests",
|
||||
"services/mailu/scripts/mailu_sync.py",
|
||||
"testing"
|
||||
],
|
||||
"pytest_suites": {
|
||||
"unit": {
|
||||
"description": "Fast unit and contract tests for repo automation.",
|
||||
"paths": [
|
||||
"scripts/tests",
|
||||
"services/comms/scripts/tests",
|
||||
"testing/tests"
|
||||
],
|
||||
"junit": "build/junit-unit.xml",
|
||||
"coverage_sources": [
|
||||
"ci/scripts",
|
||||
"services/mailu/scripts",
|
||||
"testing"
|
||||
],
|
||||
"coverage_xml": "build/coverage-unit.xml"
|
||||
},
|
||||
"glue": {
|
||||
"description": "Cluster-live glue checks that validate CronJobs and exported metrics.",
|
||||
"paths": [
|
||||
"ci/tests/glue"
|
||||
],
|
||||
"junit": "build/junit-glue.xml"
|
||||
}
|
||||
},
|
||||
"profiles": {
|
||||
"local": [
|
||||
"docs",
|
||||
"smell",
|
||||
"hygiene",
|
||||
"unit",
|
||||
"coverage"
|
||||
],
|
||||
"jenkins": [
|
||||
"docs",
|
||||
"smell",
|
||||
"hygiene",
|
||||
"unit",
|
||||
"coverage",
|
||||
"glue"
|
||||
]
|
||||
},
|
||||
"manual_scripts": [
|
||||
{
|
||||
"path": "scripts/test_atlas_user_cleanup.py",
|
||||
"description": "Manual cleanup validation for Atlas user lifecycle automation."
|
||||
},
|
||||
{
|
||||
"path": "scripts/test_user_cleanup.py",
|
||||
"description": "Manual cleanup validation for shared user lifecycle automation."
|
||||
},
|
||||
{
|
||||
"path": "scripts/test_vaultwarden_user_cleanup.py",
|
||||
"description": "Manual cleanup validation for Vaultwarden user lifecycle automation."
|
||||
},
|
||||
{
|
||||
"path": "services/bstein-dev-home/scripts/test_portal_onboarding_flow.py",
|
||||
"description": "Portal onboarding end-to-end flow validation with mail delivery checks."
|
||||
},
|
||||
{
|
||||
"path": "services/keycloak/scripts/tests/test_keycloak_execute_actions_email.py",
|
||||
"description": "Standalone Keycloak SMTP execute-actions-email validation script."
|
||||
},
|
||||
{
|
||||
"path": "services/keycloak/scripts/tests/test_portal_token_exchange.py",
|
||||
"description": "Standalone Keycloak token-exchange validation script."
|
||||
}
|
||||
],
|
||||
"hygiene": {
|
||||
"max_lines": 500,
|
||||
"line_limit_globs": [
|
||||
"testing/**/*.py",
|
||||
"ci/scripts/*.py",
|
||||
"ci/tests/**/*.py",
|
||||
"scripts/tests/**/*.py",
|
||||
"services/*/scripts/tests/**/*.py",
|
||||
"services/mailu/scripts/mailu_sync.py"
|
||||
],
|
||||
"naming_rules": [
|
||||
{
|
||||
"glob": "testing/*.py",
|
||||
"pattern": "^(?:__init__|quality_[a-z0-9_]+)\\.py$",
|
||||
"description": "Top-level testing helpers use quality_* module names."
|
||||
},
|
||||
{
|
||||
"glob": "testing/tests/*.py",
|
||||
"pattern": "^test_[a-z0-9_]+\\.py$",
|
||||
"description": "Top-level pytest files use test_*.py names."
|
||||
},
|
||||
{
|
||||
"glob": "ci/tests/**/*.py",
|
||||
"pattern": "^test_[a-z0-9_]+\\.py$",
|
||||
"description": "CI pytest files use test_*.py names."
|
||||
},
|
||||
{
|
||||
"glob": "scripts/tests/**/*.py",
|
||||
"pattern": "^test_[a-z0-9_]+\\.py$",
|
||||
"description": "Script pytest files use test_*.py names."
|
||||
},
|
||||
{
|
||||
"glob": "scripts/test_*.py",
|
||||
"pattern": "^test_[a-z0-9_]+\\.py$",
|
||||
"description": "Standalone script tests use test_*.py names."
|
||||
},
|
||||
{
|
||||
"glob": "services/*/scripts/tests/**/*.py",
|
||||
"pattern": "^test_[a-z0-9_]+\\.py$",
|
||||
"description": "Service pytest files use test_*.py names."
|
||||
},
|
||||
{
|
||||
"glob": "services/*/scripts/test_*.py",
|
||||
"pattern": "^test_[a-z0-9_]+\\.py$",
|
||||
"description": "Standalone service test scripts use test_*.py names."
|
||||
}
|
||||
]
|
||||
},
|
||||
"coverage": {
|
||||
"minimum_percent": 95.0,
|
||||
"tracked_files": [
|
||||
"ci/scripts/publish_test_metrics.py",
|
||||
"testing/quality_contract.py",
|
||||
"testing/quality_docs.py",
|
||||
"testing/quality_hygiene.py",
|
||||
"testing/quality_coverage.py",
|
||||
"testing/quality_gate.py"
|
||||
]
|
||||
}
|
||||
}
|
||||
17
testing/quality_contract.py
Normal file
17
testing/quality_contract.py
Normal file
@ -0,0 +1,17 @@
|
||||
"""Helpers for loading the repository testing contract."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
CONTRACT_PATH = Path(__file__).with_name("quality_contract.json")
|
||||
|
||||
|
||||
def load_contract(contract_path: Path | None = None) -> dict[str, Any]:
|
||||
"""Return the parsed testing contract."""
|
||||
path = contract_path or CONTRACT_PATH
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
return json.load(handle)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user