diff --git a/.gitignore b/.gitignore index 8e09aa9..8d0ab1e 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ __pycache__/ *.py[cod] .pytest_cache .venv +tmp/ diff --git a/ci/Jenkinsfile.titan-iac b/ci/Jenkinsfile.titan-iac new file mode 100644 index 0000000..3b13eb0 --- /dev/null +++ b/ci/Jenkinsfile.titan-iac @@ -0,0 +1,53 @@ +pipeline { + agent { + kubernetes { + defaultContainer 'python' + yaml """ +apiVersion: v1 +kind: Pod +spec: + containers: + - name: python + image: python:3.12-slim + command: + - cat + tty: true +""" + } + } + environment { + PIP_DISABLE_PIP_VERSION_CHECK = '1' + PYTHONUNBUFFERED = '1' + DEPLOY_BRANCH = 'deploy' + } + stages { + stage('Checkout') { + steps { + checkout scm + } + } + stage('Install deps') { + steps { + sh 'pip install --no-cache-dir -r ci/requirements.txt' + } + } + stage('Glue tests') { + steps { + sh 'pytest -q ci/tests/glue' + } + } + stage('Promote') { + steps { + withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) { + sh ''' + set +x + git config user.email "jenkins@bstein.dev" + git config user.name "jenkins" + git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git + git push origin HEAD:${DEPLOY_BRANCH} + ''' + } + } + } + } +} diff --git a/ci/requirements.txt b/ci/requirements.txt new file mode 100644 index 0000000..eaa21aa --- /dev/null +++ b/ci/requirements.txt @@ -0,0 +1,4 @@ +pytest==8.3.4 +kubernetes==30.1.0 +PyYAML==6.0.2 +requests==2.32.3 diff --git a/ci/tests/glue/config.yaml b/ci/tests/glue/config.yaml new file mode 100644 index 0000000..8adf4ca --- /dev/null +++ b/ci/tests/glue/config.yaml @@ -0,0 +1,7 @@ +max_success_age_hours: 48 +allow_suspended: + - comms/othrys-room-reset + - comms/pin-othrys-invite + - comms/seed-othrys-room + - finance/firefly-user-sync + - health/wger-user-sync diff --git a/ci/tests/glue/test_glue_cronjobs.py b/ci/tests/glue/test_glue_cronjobs.py new file mode 100644 index 0000000..ec6b620 --- /dev/null +++ b/ci/tests/glue/test_glue_cronjobs.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from datetime import datetime, timezone +from pathlib import Path + +import yaml +from kubernetes import client, config + + +CONFIG_PATH = Path(__file__).with_name("config.yaml") + + +def _load_config() -> dict: + with CONFIG_PATH.open("r", encoding="utf-8") as handle: + return yaml.safe_load(handle) or {} + + +def _load_kube(): + try: + config.load_incluster_config() + except config.ConfigException: + config.load_kube_config() + + +def test_glue_cronjobs_recent_success(): + cfg = _load_config() + max_age_hours = int(cfg.get("max_success_age_hours", 48)) + allow_suspended = set(cfg.get("allow_suspended", [])) + + _load_kube() + batch = client.BatchV1Api() + cronjobs = batch.list_cron_job_for_all_namespaces(label_selector="atlas.bstein.dev/glue=true").items + + assert cronjobs, "No glue cronjobs found with atlas.bstein.dev/glue=true" + + now = datetime.now(timezone.utc) + for cronjob in cronjobs: + name = f"{cronjob.metadata.namespace}/{cronjob.metadata.name}" + if cronjob.spec.suspend: + assert name in allow_suspended, f"{name} is suspended but not in allow_suspended" + continue + + last_success = cronjob.status.last_successful_time + assert last_success is not None, f"{name} has no lastSuccessfulTime" + age_hours = (now - last_success).total_seconds() / 3600 + assert age_hours <= max_age_hours, f"{name} last success {age_hours:.1f}h ago" diff --git a/ci/tests/glue/test_glue_metrics.py b/ci/tests/glue/test_glue_metrics.py new file mode 100644 index 0000000..16b01c7 --- /dev/null +++ b/ci/tests/glue/test_glue_metrics.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +import os + +import requests + + +VM_URL = os.environ.get("VM_URL", "http://victoria-metrics-single-server:8428").rstrip("/") + + +def _query(promql: str) -> list[dict]: + response = requests.get(f"{VM_URL}/api/v1/query", params={"query": promql}, timeout=10) + response.raise_for_status() + payload = response.json() + return payload.get("data", {}).get("result", []) + + +def test_glue_metrics_present(): + series = _query('kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}') + assert series, "No glue cronjob label series found" + + +def test_glue_metrics_success_join(): + query = ( + "kube_cronjob_status_last_successful_time " + 'and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}' + ) + series = _query(query) + assert series, "No glue cronjob last success series found" diff --git a/clusters/atlas/applications/kustomization.yaml b/clusters/atlas/applications/kustomization.yaml deleted file mode 100644 index ed6d795..0000000 --- a/clusters/atlas/applications/kustomization.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# clusters/atlas/applications/kustomization.yaml -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: - - ../../services/crypto - - ../../services/gitea - - ../../services/jellyfin - - ../../services/comms - - ../../services/monitoring - - ../../services/logging - - ../../services/pegasus - - ../../services/vault - - ../../services/bstein-dev-home diff --git a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml index e198db4..88dda40 100644 --- a/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml +++ b/clusters/atlas/flux-system/applications/bstein-dev-home/image-automation.yaml @@ -13,14 +13,14 @@ spec: git: checkout: ref: - branch: main + branch: feature/vault-consumption commit: author: email: ops@bstein.dev name: flux-bot messageTemplate: "chore(bstein-dev-home): update images to {{range .Updated.Images}}{{.}}{{end}}" push: - branch: main + branch: feature/vault-consumption update: strategy: Setters path: services/bstein-dev-home diff --git a/clusters/atlas/flux-system/applications/comms/kustomization.yaml b/clusters/atlas/flux-system/applications/comms/kustomization.yaml index 0fb664a..cde929d 100644 --- a/clusters/atlas/flux-system/applications/comms/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/comms/kustomization.yaml @@ -1,4 +1,4 @@ -# clusters/atlas/flux-system/applications/communication/kustomization.yaml +# clusters/atlas/flux-system/applications/comms/kustomization.yaml apiVersion: kustomize.toolkit.fluxcd.io/v1 kind: Kustomization metadata: diff --git a/clusters/atlas/flux-system/applications/finance/kustomization.yaml b/clusters/atlas/flux-system/applications/finance/kustomization.yaml new file mode 100644 index 0000000..370e2d1 --- /dev/null +++ b/clusters/atlas/flux-system/applications/finance/kustomization.yaml @@ -0,0 +1,24 @@ +# clusters/atlas/flux-system/applications/finance/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: finance + namespace: flux-system +spec: + interval: 10m + path: ./services/finance + prune: true + sourceRef: + kind: GitRepository + name: flux-system + targetNamespace: finance + healthChecks: + - apiVersion: apps/v1 + kind: Deployment + name: actual-budget + namespace: finance + - apiVersion: apps/v1 + kind: Deployment + name: firefly + namespace: finance + wait: false diff --git a/clusters/atlas/flux-system/applications/health/kustomization.yaml b/clusters/atlas/flux-system/applications/health/kustomization.yaml new file mode 100644 index 0000000..f4a3d61 --- /dev/null +++ b/clusters/atlas/flux-system/applications/health/kustomization.yaml @@ -0,0 +1,25 @@ +# clusters/atlas/flux-system/applications/health/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: health + namespace: flux-system +spec: + interval: 10m + path: ./services/health + prune: true + sourceRef: + kind: GitRepository + name: flux-system + targetNamespace: health + dependsOn: + - name: keycloak + - name: postgres + - name: traefik + - name: vault + healthChecks: + - apiVersion: apps/v1 + kind: Deployment + name: wger + namespace: health + wait: false diff --git a/clusters/atlas/flux-system/applications/kustomization.yaml b/clusters/atlas/flux-system/applications/kustomization.yaml index d48cf9e..417a3ec 100644 --- a/clusters/atlas/flux-system/applications/kustomization.yaml +++ b/clusters/atlas/flux-system/applications/kustomization.yaml @@ -16,6 +16,7 @@ resources: - harbor/image-automation.yaml - jellyfin/kustomization.yaml - xmr-miner/kustomization.yaml + - wallet-monero-temp/kustomization.yaml - sui-metrics/kustomization.yaml - openldap/kustomization.yaml - keycloak/kustomization.yaml @@ -27,3 +28,5 @@ resources: - nextcloud-mail-sync/kustomization.yaml - outline/kustomization.yaml - planka/kustomization.yaml + - finance/kustomization.yaml + - health/kustomization.yaml diff --git a/clusters/atlas/flux-system/applications/wallet-monero-temp/kustomization.yaml b/clusters/atlas/flux-system/applications/wallet-monero-temp/kustomization.yaml new file mode 100644 index 0000000..700e17f --- /dev/null +++ b/clusters/atlas/flux-system/applications/wallet-monero-temp/kustomization.yaml @@ -0,0 +1,19 @@ +# clusters/atlas/flux-system/applications/wallet-monero-temp/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: wallet-monero-temp + namespace: flux-system +spec: + interval: 10m + path: ./services/crypto/wallet-monero-temp + targetNamespace: crypto + prune: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + dependsOn: + - name: crypto + - name: xmr-miner + wait: true diff --git a/clusters/atlas/flux-system/gotk-components.yaml b/clusters/atlas/flux-system/gotk-components.yaml index 6c475ff..7d56afa 100644 --- a/clusters/atlas/flux-system/gotk-components.yaml +++ b/clusters/atlas/flux-system/gotk-components.yaml @@ -1,3 +1,4 @@ +# clusters/atlas/flux-system/gotk-components.yaml --- # This manifest was generated by flux. DO NOT EDIT. # Flux Version: v2.7.5 diff --git a/clusters/atlas/flux-system/gotk-sync.yaml b/clusters/atlas/flux-system/gotk-sync.yaml index 473ab99..400c76d 100644 --- a/clusters/atlas/flux-system/gotk-sync.yaml +++ b/clusters/atlas/flux-system/gotk-sync.yaml @@ -1,3 +1,4 @@ +# clusters/atlas/flux-system/gotk-sync.yaml # This manifest was generated by flux. DO NOT EDIT. --- apiVersion: source.toolkit.fluxcd.io/v1 @@ -8,7 +9,7 @@ metadata: spec: interval: 1m0s ref: - branch: main + branch: deploy secretRef: name: flux-system-gitea url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git diff --git a/clusters/atlas/flux-system/platform/cert-manager-cleanup/kustomization.yaml b/clusters/atlas/flux-system/platform/cert-manager-cleanup/kustomization.yaml new file mode 100644 index 0000000..230e22a --- /dev/null +++ b/clusters/atlas/flux-system/platform/cert-manager-cleanup/kustomization.yaml @@ -0,0 +1,17 @@ +# clusters/atlas/flux-system/platform/cert-manager-cleanup/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: cert-manager-cleanup + namespace: flux-system +spec: + interval: 30m + path: ./infrastructure/cert-manager/cleanup + prune: true + force: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + targetNamespace: cert-manager + wait: true diff --git a/clusters/atlas/flux-system/platform/cert-manager/kustomization.yaml b/clusters/atlas/flux-system/platform/cert-manager/kustomization.yaml new file mode 100644 index 0000000..63469af --- /dev/null +++ b/clusters/atlas/flux-system/platform/cert-manager/kustomization.yaml @@ -0,0 +1,19 @@ +# clusters/atlas/flux-system/platform/cert-manager/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: cert-manager + namespace: flux-system +spec: + interval: 30m + path: ./infrastructure/cert-manager + prune: true + force: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + targetNamespace: cert-manager + dependsOn: + - name: helm + wait: true diff --git a/clusters/atlas/flux-system/platform/kustomization.yaml b/clusters/atlas/flux-system/platform/kustomization.yaml index 6f88db7..b689cc0 100644 --- a/clusters/atlas/flux-system/platform/kustomization.yaml +++ b/clusters/atlas/flux-system/platform/kustomization.yaml @@ -4,12 +4,16 @@ kind: Kustomization resources: - core/kustomization.yaml - helm/kustomization.yaml + - cert-manager/kustomization.yaml - metallb/kustomization.yaml - traefik/kustomization.yaml - gitops-ui/kustomization.yaml - monitoring/kustomization.yaml - logging/kustomization.yaml - maintenance/kustomization.yaml + - longhorn-adopt/kustomization.yaml + - longhorn/kustomization.yaml - longhorn-ui/kustomization.yaml - postgres/kustomization.yaml - ../platform/vault-csi/kustomization.yaml + - ../platform/vault-injector/kustomization.yaml diff --git a/clusters/atlas/flux-system/platform/longhorn-adopt/kustomization.yaml b/clusters/atlas/flux-system/platform/longhorn-adopt/kustomization.yaml new file mode 100644 index 0000000..f568a5e --- /dev/null +++ b/clusters/atlas/flux-system/platform/longhorn-adopt/kustomization.yaml @@ -0,0 +1,17 @@ +# clusters/atlas/flux-system/platform/longhorn-adopt/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: longhorn-adopt + namespace: flux-system +spec: + interval: 30m + path: ./infrastructure/longhorn/adopt + prune: true + force: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + targetNamespace: longhorn-system + wait: true diff --git a/clusters/atlas/flux-system/platform/longhorn-ui/kustomization.yaml b/clusters/atlas/flux-system/platform/longhorn-ui/kustomization.yaml index fc6bd1f..4517728 100644 --- a/clusters/atlas/flux-system/platform/longhorn-ui/kustomization.yaml +++ b/clusters/atlas/flux-system/platform/longhorn-ui/kustomization.yaml @@ -15,4 +15,5 @@ spec: namespace: flux-system dependsOn: - name: core + - name: longhorn wait: true diff --git a/clusters/atlas/flux-system/platform/longhorn/kustomization.yaml b/clusters/atlas/flux-system/platform/longhorn/kustomization.yaml new file mode 100644 index 0000000..1a51254 --- /dev/null +++ b/clusters/atlas/flux-system/platform/longhorn/kustomization.yaml @@ -0,0 +1,20 @@ +# clusters/atlas/flux-system/platform/longhorn/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: longhorn + namespace: flux-system +spec: + interval: 30m + path: ./infrastructure/longhorn/core + prune: true + force: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + targetNamespace: longhorn-system + dependsOn: + - name: helm + - name: longhorn-adopt + wait: false diff --git a/clusters/atlas/flux-system/platform/vault-injector/kustomization.yaml b/clusters/atlas/flux-system/platform/vault-injector/kustomization.yaml new file mode 100644 index 0000000..d7d740d --- /dev/null +++ b/clusters/atlas/flux-system/platform/vault-injector/kustomization.yaml @@ -0,0 +1,16 @@ +# clusters/atlas/flux-system/platform/vault-injector/kustomization.yaml +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: vault-injector + namespace: flux-system +spec: + interval: 30m + path: ./infrastructure/vault-injector + targetNamespace: vault + prune: true + sourceRef: + kind: GitRepository + name: flux-system + namespace: flux-system + wait: true diff --git a/clusters/atlas/platform/kustomization.yaml b/clusters/atlas/platform/kustomization.yaml deleted file mode 100644 index 43fa993..0000000 --- a/clusters/atlas/platform/kustomization.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# clusters/atlas/platform/kustomization.yaml -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: - - ../../../infrastructure/modules/base - - ../../../infrastructure/modules/profiles/atlas-ha - - ../../../infrastructure/sources/cert-manager/letsencrypt.yaml - - ../../../infrastructure/metallb diff --git a/dockerfiles/Dockerfile.comms-guest-tools b/dockerfiles/Dockerfile.comms-guest-tools new file mode 100644 index 0000000..2a18016 --- /dev/null +++ b/dockerfiles/Dockerfile.comms-guest-tools @@ -0,0 +1,5 @@ +FROM python:3.11-slim + +ENV PIP_DISABLE_PIP_VERSION_CHECK=1 + +RUN pip install --no-cache-dir requests psycopg2-binary diff --git a/dockerfiles/Dockerfile.harbor-core-vault b/dockerfiles/Dockerfile.harbor-core-vault new file mode 100644 index 0000000..b313647 --- /dev/null +++ b/dockerfiles/Dockerfile.harbor-core-vault @@ -0,0 +1,9 @@ +FROM registry.bstein.dev/infra/harbor-core:v2.14.1-arm64 + +USER root +COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh +RUN chmod 0755 /entrypoint.sh +USER harbor + +ENTRYPOINT ["/entrypoint.sh"] +CMD ["/harbor/entrypoint.sh"] diff --git a/dockerfiles/Dockerfile.harbor-jobservice-vault b/dockerfiles/Dockerfile.harbor-jobservice-vault new file mode 100644 index 0000000..28a82d5 --- /dev/null +++ b/dockerfiles/Dockerfile.harbor-jobservice-vault @@ -0,0 +1,9 @@ +FROM registry.bstein.dev/infra/harbor-jobservice:v2.14.1-arm64 + +USER root +COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh +RUN chmod 0755 /entrypoint.sh +USER harbor + +ENTRYPOINT ["/entrypoint.sh"] +CMD ["/harbor/entrypoint.sh"] diff --git a/dockerfiles/Dockerfile.harbor-registry-vault b/dockerfiles/Dockerfile.harbor-registry-vault new file mode 100644 index 0000000..608b6e5 --- /dev/null +++ b/dockerfiles/Dockerfile.harbor-registry-vault @@ -0,0 +1,9 @@ +FROM registry.bstein.dev/infra/harbor-registry:v2.14.1-arm64 + +USER root +COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh +RUN chmod 0755 /entrypoint.sh +USER harbor + +ENTRYPOINT ["/entrypoint.sh"] +CMD ["/home/harbor/entrypoint.sh"] diff --git a/dockerfiles/Dockerfile.harbor-registryctl-vault b/dockerfiles/Dockerfile.harbor-registryctl-vault new file mode 100644 index 0000000..b9cf061 --- /dev/null +++ b/dockerfiles/Dockerfile.harbor-registryctl-vault @@ -0,0 +1,9 @@ +FROM registry.bstein.dev/infra/harbor-registryctl:v2.14.1-arm64 + +USER root +COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh +RUN chmod 0755 /entrypoint.sh +USER harbor + +ENTRYPOINT ["/entrypoint.sh"] +CMD ["/home/harbor/start.sh"] diff --git a/dockerfiles/Dockerfile.livekit-token-vault b/dockerfiles/Dockerfile.livekit-token-vault new file mode 100644 index 0000000..cbe49b1 --- /dev/null +++ b/dockerfiles/Dockerfile.livekit-token-vault @@ -0,0 +1,10 @@ +FROM ghcr.io/element-hq/lk-jwt-service:0.3.0 AS base + +FROM alpine:3.20 +RUN apk add --no-cache ca-certificates +COPY --from=base /lk-jwt-service /lk-jwt-service +COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh +RUN chmod 0755 /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] +CMD ["/lk-jwt-service"] diff --git a/dockerfiles/Dockerfile.oauth2-proxy-vault b/dockerfiles/Dockerfile.oauth2-proxy-vault new file mode 100644 index 0000000..71ce2a6 --- /dev/null +++ b/dockerfiles/Dockerfile.oauth2-proxy-vault @@ -0,0 +1,10 @@ +FROM quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 AS base + +FROM alpine:3.20 +RUN apk add --no-cache ca-certificates +COPY --from=base /bin/oauth2-proxy /bin/oauth2-proxy +COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh +RUN chmod 0755 /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] +CMD ["/bin/oauth2-proxy"] diff --git a/dockerfiles/Dockerfile.pegasus-vault b/dockerfiles/Dockerfile.pegasus-vault new file mode 100644 index 0000000..ac49095 --- /dev/null +++ b/dockerfiles/Dockerfile.pegasus-vault @@ -0,0 +1,10 @@ +FROM registry.bstein.dev/streaming/pegasus:1.2.32 AS base + +FROM alpine:3.20 +RUN apk add --no-cache ca-certificates +COPY --from=base /pegasus /pegasus +COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh +RUN chmod 0755 /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] +CMD ["/pegasus"] diff --git a/dockerfiles/vault-entrypoint.sh b/dockerfiles/vault-entrypoint.sh new file mode 100644 index 0000000..fa3b791 --- /dev/null +++ b/dockerfiles/vault-entrypoint.sh @@ -0,0 +1,34 @@ +#!/bin/sh +set -eu + +if [ -n "${VAULT_ENV_FILE:-}" ]; then + if [ -f "${VAULT_ENV_FILE}" ]; then + # shellcheck disable=SC1090 + . "${VAULT_ENV_FILE}" + else + echo "Vault env file not found: ${VAULT_ENV_FILE}" >&2 + exit 1 + fi +fi + +if [ -n "${VAULT_COPY_FILES:-}" ]; then + old_ifs="$IFS" + IFS=',' + for pair in ${VAULT_COPY_FILES}; do + src="${pair%%:*}" + dest="${pair#*:}" + if [ -z "${src}" ] || [ -z "${dest}" ]; then + echo "Vault copy entry malformed: ${pair}" >&2 + exit 1 + fi + if [ ! -f "${src}" ]; then + echo "Vault file not found: ${src}" >&2 + exit 1 + fi + mkdir -p "$(dirname "${dest}")" + cp "${src}" "${dest}" + done + IFS="$old_ifs" +fi + +exec "$@" diff --git a/infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml b/infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml new file mode 100644 index 0000000..c1de1fc --- /dev/null +++ b/infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml @@ -0,0 +1,40 @@ +# infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: cert-manager-cleanup-2 + namespace: cert-manager +spec: + backoffLimit: 1 + template: + spec: + serviceAccountName: cert-manager-cleanup + restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + containers: + - name: cleanup + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/usr/bin/env", "bash"] + args: ["/scripts/cert_manager_cleanup.sh"] + volumeMounts: + - name: script + mountPath: /scripts + readOnly: true + volumes: + - name: script + configMap: + name: cert-manager-cleanup-script + defaultMode: 0555 diff --git a/infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml b/infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml new file mode 100644 index 0000000..ee275c5 --- /dev/null +++ b/infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml @@ -0,0 +1,58 @@ +# infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cert-manager-cleanup + namespace: cert-manager +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: cert-manager-cleanup +rules: + - apiGroups: [""] + resources: + - pods + - services + - endpoints + - configmaps + - secrets + - serviceaccounts + verbs: ["get", "list", "watch", "delete"] + - apiGroups: ["apps"] + resources: + - deployments + - daemonsets + - statefulsets + - replicasets + verbs: ["get", "list", "watch", "delete"] + - apiGroups: ["batch"] + resources: + - jobs + - cronjobs + verbs: ["get", "list", "watch", "delete"] + - apiGroups: ["rbac.authorization.k8s.io"] + resources: + - roles + - rolebindings + - clusterroles + - clusterrolebindings + verbs: ["get", "list", "watch", "delete"] + - apiGroups: ["admissionregistration.k8s.io"] + resources: + - validatingwebhookconfigurations + - mutatingwebhookconfigurations + verbs: ["get", "list", "watch", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: cert-manager-cleanup +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cert-manager-cleanup +subjects: + - kind: ServiceAccount + name: cert-manager-cleanup + namespace: cert-manager diff --git a/infrastructure/cert-manager/cleanup/kustomization.yaml b/infrastructure/cert-manager/cleanup/kustomization.yaml new file mode 100644 index 0000000..8aee369 --- /dev/null +++ b/infrastructure/cert-manager/cleanup/kustomization.yaml @@ -0,0 +1,15 @@ +# infrastructure/cert-manager/cleanup/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - namespace.yaml + - cert-manager-cleanup-rbac.yaml + - cert-manager-cleanup-job.yaml + +configMapGenerator: + - name: cert-manager-cleanup-script + namespace: cert-manager + files: + - cert_manager_cleanup.sh=scripts/cert_manager_cleanup.sh + options: + disableNameSuffixHash: true diff --git a/infrastructure/cert-manager/cleanup/namespace.yaml b/infrastructure/cert-manager/cleanup/namespace.yaml new file mode 100644 index 0000000..762cc25 --- /dev/null +++ b/infrastructure/cert-manager/cleanup/namespace.yaml @@ -0,0 +1,5 @@ +# infrastructure/cert-manager/cleanup/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: cert-manager diff --git a/infrastructure/cert-manager/cleanup/scripts/cert_manager_cleanup.sh b/infrastructure/cert-manager/cleanup/scripts/cert_manager_cleanup.sh new file mode 100644 index 0000000..9bdfc33 --- /dev/null +++ b/infrastructure/cert-manager/cleanup/scripts/cert_manager_cleanup.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +set -euo pipefail + +namespace="cert-manager" +selectors=( + "app.kubernetes.io/name=cert-manager" + "app.kubernetes.io/instance=cert-manager" + "app.kubernetes.io/instance=certmanager-prod" +) + +delete_namespaced() { + local selector="$1" + kubectl -n "${namespace}" delete deployment,daemonset,statefulset,replicaset \ + --selector "${selector}" --ignore-not-found --wait=false + kubectl -n "${namespace}" delete pod,service,endpoints,serviceaccount,configmap,secret \ + --selector "${selector}" --ignore-not-found --wait=false + kubectl -n "${namespace}" delete role,rolebinding \ + --selector "${selector}" --ignore-not-found --wait=false + kubectl -n "${namespace}" delete job,cronjob \ + --selector "${selector}" --ignore-not-found --wait=false +} + +delete_cluster_scoped() { + local selector="$1" + kubectl delete clusterrole,clusterrolebinding \ + --selector "${selector}" --ignore-not-found --wait=false + kubectl delete mutatingwebhookconfiguration,validatingwebhookconfiguration \ + --selector "${selector}" --ignore-not-found --wait=false +} + +for selector in "${selectors[@]}"; do + delete_namespaced "${selector}" + delete_cluster_scoped "${selector}" +done + +kubectl delete mutatingwebhookconfiguration cert-manager-webhook --ignore-not-found --wait=false +kubectl delete validatingwebhookconfiguration cert-manager-webhook --ignore-not-found --wait=false diff --git a/infrastructure/cert-manager/helmrelease.yaml b/infrastructure/cert-manager/helmrelease.yaml new file mode 100644 index 0000000..0a0ed22 --- /dev/null +++ b/infrastructure/cert-manager/helmrelease.yaml @@ -0,0 +1,67 @@ +# infrastructure/cert-manager/helmrelease.yaml +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: cert-manager + namespace: cert-manager +spec: + interval: 30m + chart: + spec: + chart: cert-manager + version: v1.17.0 + sourceRef: + kind: HelmRepository + name: jetstack + namespace: flux-system + install: + crds: CreateReplace + remediation: { retries: 3 } + timeout: 10m + upgrade: + crds: CreateReplace + remediation: + retries: 3 + remediateLastFailure: true + cleanupOnFail: true + timeout: 10m + values: + installCRDs: true + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - rpi4 + webhook: + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - rpi4 + cainjector: + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - rpi4 diff --git a/infrastructure/cert-manager/kustomization.yaml b/infrastructure/cert-manager/kustomization.yaml new file mode 100644 index 0000000..dc9d06d --- /dev/null +++ b/infrastructure/cert-manager/kustomization.yaml @@ -0,0 +1,6 @@ +# infrastructure/cert-manager/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - namespace.yaml + - helmrelease.yaml diff --git a/infrastructure/cert-manager/namespace.yaml b/infrastructure/cert-manager/namespace.yaml new file mode 100644 index 0000000..8a43590 --- /dev/null +++ b/infrastructure/cert-manager/namespace.yaml @@ -0,0 +1,5 @@ +# infrastructure/cert-manager/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: cert-manager diff --git a/infrastructure/core/coredns-custom.yaml b/infrastructure/core/coredns-custom.yaml new file mode 100644 index 0000000..8aeff14 --- /dev/null +++ b/infrastructure/core/coredns-custom.yaml @@ -0,0 +1,44 @@ +# infrastructure/core/coredns-custom.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: coredns-custom + namespace: kube-system +data: + bstein-dev.server: | + bstein.dev:53 { + errors + cache 30 + hosts { + 192.168.22.9 alerts.bstein.dev + 192.168.22.9 auth.bstein.dev + 192.168.22.9 bstein.dev + 10.43.6.87 budget.bstein.dev + 192.168.22.9 call.live.bstein.dev + 192.168.22.9 cd.bstein.dev + 192.168.22.9 chat.ai.bstein.dev + 192.168.22.9 ci.bstein.dev + 192.168.22.9 cloud.bstein.dev + 192.168.22.9 health.bstein.dev + 192.168.22.9 kit.live.bstein.dev + 192.168.22.9 live.bstein.dev + 192.168.22.9 logs.bstein.dev + 192.168.22.9 longhorn.bstein.dev + 192.168.22.4 mail.bstein.dev + 192.168.22.9 matrix.live.bstein.dev + 192.168.22.9 metrics.bstein.dev + 192.168.22.9 monero.bstein.dev + 10.43.6.87 money.bstein.dev + 192.168.22.9 notes.bstein.dev + 192.168.22.9 office.bstein.dev + 192.168.22.9 pegasus.bstein.dev + 192.168.22.9 registry.bstein.dev + 192.168.22.9 scm.bstein.dev + 192.168.22.9 secret.bstein.dev + 192.168.22.9 sso.bstein.dev + 192.168.22.9 stream.bstein.dev + 192.168.22.9 tasks.bstein.dev + 192.168.22.9 vault.bstein.dev + fallthrough + } + } diff --git a/infrastructure/core/coredns-deployment.yaml b/infrastructure/core/coredns-deployment.yaml new file mode 100644 index 0000000..1e69eec --- /dev/null +++ b/infrastructure/core/coredns-deployment.yaml @@ -0,0 +1,141 @@ +# infrastructure/core/coredns-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: coredns + namespace: kube-system + labels: + k8s-app: kube-dns + kubernetes.io/name: CoreDNS +spec: + progressDeadlineSeconds: 600 + replicas: 2 + revisionHistoryLimit: 0 + selector: + matchLabels: + k8s-app: kube-dns + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 25% + maxUnavailable: 1 + template: + metadata: + labels: + k8s-app: kube-dns + spec: + containers: + - name: coredns + image: registry.bstein.dev/infra/coredns:1.12.1 + imagePullPolicy: IfNotPresent + args: + - -conf + - /etc/coredns/Corefile + ports: + - containerPort: 53 + name: dns + protocol: UDP + - containerPort: 53 + name: dns-tcp + protocol: TCP + - containerPort: 9153 + name: metrics + protocol: TCP + livenessProbe: + httpGet: + path: /health + port: 8080 + scheme: HTTP + initialDelaySeconds: 60 + periodSeconds: 10 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /ready + port: 8181 + scheme: HTTP + periodSeconds: 2 + timeoutSeconds: 1 + successThreshold: 1 + failureThreshold: 3 + resources: + limits: + memory: 170Mi + requests: + cpu: 100m + memory: 70Mi + securityContext: + allowPrivilegeEscalation: false + capabilities: + add: + - NET_BIND_SERVICE + drop: + - all + readOnlyRootFilesystem: true + volumeMounts: + - name: config-volume + mountPath: /etc/coredns + readOnly: true + - name: custom-config-volume + mountPath: /etc/coredns/custom + readOnly: true + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - rpi4 + - key: node-role.kubernetes.io/worker + operator: In + values: + - "true" + dnsPolicy: Default + nodeSelector: + kubernetes.io/os: linux + priorityClassName: system-cluster-critical + restartPolicy: Always + schedulerName: default-scheduler + serviceAccountName: coredns + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: DoNotSchedule + labelSelector: + matchLabels: + k8s-app: kube-dns + - maxSkew: 1 + topologyKey: topology.kubernetes.io/zone + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + k8s-app: kube-dns + volumes: + - name: config-volume + configMap: + name: coredns + defaultMode: 420 + items: + - key: Corefile + path: Corefile + - key: NodeHosts + path: NodeHosts + - name: custom-config-volume + configMap: + name: coredns-custom + optional: true + defaultMode: 420 diff --git a/infrastructure/core/kustomization.yaml b/infrastructure/core/kustomization.yaml index 14d6a02..6286186 100644 --- a/infrastructure/core/kustomization.yaml +++ b/infrastructure/core/kustomization.yaml @@ -4,5 +4,7 @@ kind: Kustomization resources: - ../modules/base - ../modules/profiles/atlas-ha + - coredns-custom.yaml + - coredns-deployment.yaml - ../sources/cert-manager/letsencrypt.yaml - ../sources/cert-manager/letsencrypt-prod.yaml diff --git a/infrastructure/longhorn/adopt/kustomization.yaml b/infrastructure/longhorn/adopt/kustomization.yaml new file mode 100644 index 0000000..f70b223 --- /dev/null +++ b/infrastructure/longhorn/adopt/kustomization.yaml @@ -0,0 +1,15 @@ +# infrastructure/longhorn/adopt/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - namespace.yaml + - longhorn-adopt-rbac.yaml + - longhorn-helm-adopt-job.yaml + +configMapGenerator: + - name: longhorn-helm-adopt-script + namespace: longhorn-system + files: + - longhorn_helm_adopt.sh=scripts/longhorn_helm_adopt.sh + options: + disableNameSuffixHash: true diff --git a/infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml b/infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml new file mode 100644 index 0000000..31ea73b --- /dev/null +++ b/infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml @@ -0,0 +1,56 @@ +# infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: longhorn-helm-adopt + namespace: longhorn-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: longhorn-helm-adopt +rules: + - apiGroups: [""] + resources: + - configmaps + - services + - serviceaccounts + - secrets + verbs: ["get", "list", "watch", "patch", "update"] + - apiGroups: ["apps"] + resources: + - deployments + - daemonsets + verbs: ["get", "list", "watch", "patch", "update"] + - apiGroups: ["batch"] + resources: + - jobs + verbs: ["get", "list", "watch", "patch", "update"] + - apiGroups: ["rbac.authorization.k8s.io"] + resources: + - roles + - rolebindings + - clusterroles + - clusterrolebindings + verbs: ["get", "list", "watch", "patch", "update"] + - apiGroups: ["apiextensions.k8s.io"] + resources: + - customresourcedefinitions + verbs: ["get", "list", "watch", "patch", "update"] + - apiGroups: ["scheduling.k8s.io"] + resources: + - priorityclasses + verbs: ["get", "list", "watch", "patch", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: longhorn-helm-adopt +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: longhorn-helm-adopt +subjects: + - kind: ServiceAccount + name: longhorn-helm-adopt + namespace: longhorn-system diff --git a/infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml b/infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml new file mode 100644 index 0000000..e1a520a --- /dev/null +++ b/infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml @@ -0,0 +1,40 @@ +# infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: longhorn-helm-adopt-2 + namespace: longhorn-system +spec: + backoffLimit: 1 + template: + spec: + serviceAccountName: longhorn-helm-adopt + restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + containers: + - name: adopt + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/usr/bin/env", "bash"] + args: ["/scripts/longhorn_helm_adopt.sh"] + volumeMounts: + - name: script + mountPath: /scripts + readOnly: true + volumes: + - name: script + configMap: + name: longhorn-helm-adopt-script + defaultMode: 0555 diff --git a/infrastructure/longhorn/adopt/namespace.yaml b/infrastructure/longhorn/adopt/namespace.yaml new file mode 100644 index 0000000..8db20de --- /dev/null +++ b/infrastructure/longhorn/adopt/namespace.yaml @@ -0,0 +1,5 @@ +# infrastructure/longhorn/adopt/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: longhorn-system diff --git a/infrastructure/longhorn/adopt/scripts/longhorn_helm_adopt.sh b/infrastructure/longhorn/adopt/scripts/longhorn_helm_adopt.sh new file mode 100644 index 0000000..343ade8 --- /dev/null +++ b/infrastructure/longhorn/adopt/scripts/longhorn_helm_adopt.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +set -euo pipefail + +release_name="longhorn" +release_namespace="longhorn-system" +selector="app.kubernetes.io/instance=${release_name}" + +annotate_and_label() { + local scope="$1" + local kind="$2" + if [ "${scope}" = "namespaced" ]; then + kubectl -n "${release_namespace}" annotate "${kind}" -l "${selector}" \ + meta.helm.sh/release-name="${release_name}" \ + meta.helm.sh/release-namespace="${release_namespace}" \ + --overwrite >/dev/null 2>&1 || true + kubectl -n "${release_namespace}" label "${kind}" -l "${selector}" \ + app.kubernetes.io/managed-by=Helm --overwrite >/dev/null 2>&1 || true + else + kubectl annotate "${kind}" -l "${selector}" \ + meta.helm.sh/release-name="${release_name}" \ + meta.helm.sh/release-namespace="${release_namespace}" \ + --overwrite >/dev/null 2>&1 || true + kubectl label "${kind}" -l "${selector}" \ + app.kubernetes.io/managed-by=Helm --overwrite >/dev/null 2>&1 || true + fi +} + +namespaced_kinds=( + configmap + service + serviceaccount + deployment + daemonset + job + role + rolebinding +) + +cluster_kinds=( + clusterrole + clusterrolebinding + customresourcedefinition + priorityclass +) + +for kind in "${namespaced_kinds[@]}"; do + annotate_and_label "namespaced" "${kind}" +done + +for kind in "${cluster_kinds[@]}"; do + annotate_and_label "cluster" "${kind}" +done diff --git a/infrastructure/longhorn/core/helmrelease.yaml b/infrastructure/longhorn/core/helmrelease.yaml new file mode 100644 index 0000000..575f4bd --- /dev/null +++ b/infrastructure/longhorn/core/helmrelease.yaml @@ -0,0 +1,80 @@ +# infrastructure/longhorn/core/helmrelease.yaml +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: longhorn + namespace: longhorn-system +spec: + interval: 30m + chart: + spec: + chart: longhorn + version: 1.8.2 + sourceRef: + kind: HelmRepository + name: longhorn + namespace: flux-system + install: + crds: Skip + remediation: { retries: 3 } + timeout: 15m + upgrade: + crds: Skip + remediation: + retries: 3 + remediateLastFailure: true + cleanupOnFail: true + timeout: 15m + values: + service: + ui: + type: NodePort + nodePort: 30824 + privateRegistry: + createSecret: false + registrySecret: longhorn-registry + image: + pullPolicy: Always + longhorn: + engine: + repository: registry.bstein.dev/infra/longhorn-engine + tag: v1.8.2 + manager: + repository: registry.bstein.dev/infra/longhorn-manager + tag: v1.8.2 + ui: + repository: registry.bstein.dev/infra/longhorn-ui + tag: v1.8.2 + instanceManager: + repository: registry.bstein.dev/infra/longhorn-instance-manager + tag: v1.8.2 + shareManager: + repository: registry.bstein.dev/infra/longhorn-share-manager + tag: v1.8.2 + backingImageManager: + repository: registry.bstein.dev/infra/longhorn-backing-image-manager + tag: v1.8.2 + supportBundleKit: + repository: registry.bstein.dev/infra/longhorn-support-bundle-kit + tag: v0.0.56 + csi: + attacher: + repository: registry.bstein.dev/infra/longhorn-csi-attacher + tag: v4.9.0 + provisioner: + repository: registry.bstein.dev/infra/longhorn-csi-provisioner + tag: v5.3.0 + nodeDriverRegistrar: + repository: registry.bstein.dev/infra/longhorn-csi-node-driver-registrar + tag: v2.14.0 + resizer: + repository: registry.bstein.dev/infra/longhorn-csi-resizer + tag: v1.13.2 + snapshotter: + repository: registry.bstein.dev/infra/longhorn-csi-snapshotter + tag: v8.2.0 + livenessProbe: + repository: registry.bstein.dev/infra/longhorn-livenessprobe + tag: v2.16.0 + defaultSettings: + systemManagedPodsImagePullPolicy: Always diff --git a/infrastructure/longhorn/core/kustomization.yaml b/infrastructure/longhorn/core/kustomization.yaml new file mode 100644 index 0000000..deb5308 --- /dev/null +++ b/infrastructure/longhorn/core/kustomization.yaml @@ -0,0 +1,18 @@ +# infrastructure/longhorn/core/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - namespace.yaml + - vault-serviceaccount.yaml + - secretproviderclass.yaml + - vault-sync-deployment.yaml + - helmrelease.yaml + - longhorn-settings-ensure-job.yaml + +configMapGenerator: + - name: longhorn-settings-ensure-script + files: + - longhorn_settings_ensure.sh=scripts/longhorn_settings_ensure.sh + +generatorOptions: + disableNameSuffixHash: true diff --git a/infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml b/infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml new file mode 100644 index 0000000..932c056 --- /dev/null +++ b/infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml @@ -0,0 +1,36 @@ +# infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: longhorn-settings-ensure-4 + namespace: longhorn-system +spec: + backoffLimit: 0 + ttlSecondsAfterFinished: 3600 + template: + spec: + serviceAccountName: longhorn-service-account + restartPolicy: Never + volumes: + - name: longhorn-settings-ensure-script + configMap: + name: longhorn-settings-ensure-script + defaultMode: 0555 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + - key: node-role.kubernetes.io/worker + operator: Exists + containers: + - name: apply + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/scripts/longhorn_settings_ensure.sh"] + volumeMounts: + - name: longhorn-settings-ensure-script + mountPath: /scripts + readOnly: true diff --git a/infrastructure/longhorn/core/namespace.yaml b/infrastructure/longhorn/core/namespace.yaml new file mode 100644 index 0000000..6b794fd --- /dev/null +++ b/infrastructure/longhorn/core/namespace.yaml @@ -0,0 +1,5 @@ +# infrastructure/longhorn/core/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: longhorn-system diff --git a/infrastructure/longhorn/core/scripts/longhorn_settings_ensure.sh b/infrastructure/longhorn/core/scripts/longhorn_settings_ensure.sh new file mode 100644 index 0000000..f13e87a --- /dev/null +++ b/infrastructure/longhorn/core/scripts/longhorn_settings_ensure.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env sh +set -eu + +# Longhorn blocks direct CR patches for some settings; use the internal API instead. + +api_base="http://longhorn-backend.longhorn-system.svc:9500/v1/settings" + +wait_for_api() { + attempts=30 + while [ "${attempts}" -gt 0 ]; do + if curl -fsS "${api_base}" >/dev/null 2>&1; then + return 0 + fi + attempts=$((attempts - 1)) + sleep 2 + done + echo "Longhorn API not ready after retries." >&2 + return 1 +} + +update_setting() { + name="$1" + value="$2" + + current="$(curl -fsS "${api_base}/${name}" || true)" + if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then + echo "Setting ${name} already set." + return 0 + fi + + echo "Setting ${name} -> ${value}" + curl -fsS -X PUT \ + -H "Content-Type: application/json" \ + -d "{\"value\":\"${value}\"}" \ + "${api_base}/${name}" >/dev/null +} + +wait_for_api +update_setting default-engine-image "registry.bstein.dev/infra/longhorn-engine:v1.8.2" +update_setting default-instance-manager-image "registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2" +update_setting default-backing-image-manager-image "registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2" +update_setting support-bundle-manager-image "registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56" diff --git a/infrastructure/longhorn/core/secretproviderclass.yaml b/infrastructure/longhorn/core/secretproviderclass.yaml new file mode 100644 index 0000000..031d1d8 --- /dev/null +++ b/infrastructure/longhorn/core/secretproviderclass.yaml @@ -0,0 +1,21 @@ +# infrastructure/longhorn/core/secretproviderclass.yaml +apiVersion: secrets-store.csi.x-k8s.io/v1 +kind: SecretProviderClass +metadata: + name: longhorn-vault + namespace: longhorn-system +spec: + provider: vault + parameters: + vaultAddress: "http://vault.vault.svc.cluster.local:8200" + roleName: "longhorn" + objects: | + - objectName: "harbor-pull__dockerconfigjson" + secretPath: "kv/data/atlas/harbor-pull/longhorn" + secretKey: "dockerconfigjson" + secretObjects: + - secretName: longhorn-registry + type: kubernetes.io/dockerconfigjson + data: + - objectName: harbor-pull__dockerconfigjson + key: .dockerconfigjson diff --git a/infrastructure/longhorn/core/vault-serviceaccount.yaml b/infrastructure/longhorn/core/vault-serviceaccount.yaml new file mode 100644 index 0000000..17ccef8 --- /dev/null +++ b/infrastructure/longhorn/core/vault-serviceaccount.yaml @@ -0,0 +1,6 @@ +# infrastructure/longhorn/core/vault-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: longhorn-vault-sync + namespace: longhorn-system diff --git a/infrastructure/longhorn/core/vault-sync-deployment.yaml b/infrastructure/longhorn/core/vault-sync-deployment.yaml new file mode 100644 index 0000000..95b159c --- /dev/null +++ b/infrastructure/longhorn/core/vault-sync-deployment.yaml @@ -0,0 +1,45 @@ +# infrastructure/longhorn/core/vault-sync-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: longhorn-vault-sync + namespace: longhorn-system +spec: + replicas: 1 + selector: + matchLabels: + app: longhorn-vault-sync + template: + metadata: + labels: + app: longhorn-vault-sync + spec: + serviceAccountName: longhorn-vault-sync + nodeSelector: + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 80 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5", "rpi4"] + containers: + - name: sync + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - "sleep infinity" + volumeMounts: + - name: vault-secrets + mountPath: /vault/secrets + readOnly: true + volumes: + - name: vault-secrets + csi: + driver: secrets-store.csi.k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: longhorn-vault diff --git a/infrastructure/longhorn/ui-ingress/kustomization.yaml b/infrastructure/longhorn/ui-ingress/kustomization.yaml index a2ae5f3..40b030c 100644 --- a/infrastructure/longhorn/ui-ingress/kustomization.yaml +++ b/infrastructure/longhorn/ui-ingress/kustomization.yaml @@ -2,6 +2,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: + - serviceaccount.yaml + - oauth2-proxy-longhorn.yaml - middleware.yaml - ingress.yaml - - oauth2-proxy-longhorn.yaml diff --git a/infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml b/infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml index b8d4f34..a730e31 100644 --- a/infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml +++ b/infrastructure/longhorn/ui-ingress/oauth2-proxy-longhorn.yaml @@ -32,7 +32,18 @@ spec: metadata: labels: app: oauth2-proxy-longhorn + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "longhorn" + vault.hashicorp.com/agent-inject-secret-oidc-config: "kv/data/atlas/longhorn/oauth2-proxy" + vault.hashicorp.com/agent-inject-template-oidc-config: | + {{- with secret "kv/data/atlas/longhorn/oauth2-proxy" -}} + client_id = "{{ .Data.data.client_id }}" + client_secret = "{{ .Data.data.client_secret }}" + cookie_secret = "{{ .Data.data.cookie_secret }}" + {{- end -}} spec: + serviceAccountName: longhorn-vault nodeSelector: node-role.kubernetes.io/worker: "true" affinity: @@ -50,6 +61,7 @@ spec: imagePullPolicy: IfNotPresent args: - --provider=oidc + - --config=/vault/secrets/oidc-config - --redirect-url=https://longhorn.bstein.dev/oauth2/callback - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas - --scope=openid profile email groups @@ -69,22 +81,6 @@ spec: - --skip-jwt-bearer-tokens=true - --oidc-groups-claim=groups - --cookie-domain=longhorn.bstein.dev - env: - - name: OAUTH2_PROXY_CLIENT_ID - valueFrom: - secretKeyRef: - name: oauth2-proxy-longhorn-oidc - key: client_id - - name: OAUTH2_PROXY_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: oauth2-proxy-longhorn-oidc - key: client_secret - - name: OAUTH2_PROXY_COOKIE_SECRET - valueFrom: - secretKeyRef: - name: oauth2-proxy-longhorn-oidc - key: cookie_secret ports: - containerPort: 4180 name: http diff --git a/infrastructure/longhorn/ui-ingress/serviceaccount.yaml b/infrastructure/longhorn/ui-ingress/serviceaccount.yaml new file mode 100644 index 0000000..310cb8a --- /dev/null +++ b/infrastructure/longhorn/ui-ingress/serviceaccount.yaml @@ -0,0 +1,6 @@ +# infrastructure/longhorn/ui-ingress/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: longhorn-vault + namespace: longhorn-system diff --git a/infrastructure/metallb/helmrelease.yaml b/infrastructure/metallb/helmrelease.yaml new file mode 100644 index 0000000..6298394 --- /dev/null +++ b/infrastructure/metallb/helmrelease.yaml @@ -0,0 +1,47 @@ +# infrastructure/metallb/helmrelease.yaml +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: metallb + namespace: metallb-system +spec: + interval: 30m + chart: + spec: + chart: metallb + version: 0.15.3 + sourceRef: + kind: HelmRepository + name: metallb + namespace: flux-system + install: + crds: CreateReplace + remediation: { retries: 3 } + timeout: 10m + upgrade: + crds: CreateReplace + remediation: + retries: 3 + remediateLastFailure: true + cleanupOnFail: true + timeout: 10m + values: + loadBalancerClass: metallb + prometheus: + metricsPort: 7472 + controller: + logLevel: info + webhookMode: enabled + tlsMinVersion: VersionTLS12 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi4 + - rpi5 + speaker: + logLevel: info diff --git a/infrastructure/metallb/kustomization.yaml b/infrastructure/metallb/kustomization.yaml index 1a1452c..bfc20a6 100644 --- a/infrastructure/metallb/kustomization.yaml +++ b/infrastructure/metallb/kustomization.yaml @@ -3,8 +3,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - namespace.yaml - - metallb-rendered.yaml + - helmrelease.yaml - ippool.yaml -patchesStrategicMerge: - - patches/node-placement.yaml - - patches/speaker-loglevel.yaml diff --git a/infrastructure/metallb/metallb-rendered.yaml b/infrastructure/metallb/metallb-rendered.yaml deleted file mode 100644 index 0f8ad10..0000000 --- a/infrastructure/metallb/metallb-rendered.yaml +++ /dev/null @@ -1,2411 +0,0 @@ ---- -# Source: metallb/templates/service-accounts.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: metallb-controller - namespace: "metallb-system" - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: controller ---- -# Source: metallb/templates/service-accounts.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: metallb-speaker - namespace: "metallb-system" - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: speaker ---- -# Source: metallb/templates/webhooks.yaml -apiVersion: v1 -kind: Secret -metadata: - name: metallb-webhook-cert - namespace: "metallb-system" - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm ---- -# Source: metallb/templates/exclude-l2-config.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: metallb-excludel2 - namespace: "metallb-system" - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm -data: - excludel2.yaml: | - announcedInterfacesToExclude: - - ^docker.* - - ^cbr.* - - ^dummy.* - - ^virbr.* - - ^lxcbr.* - - ^veth.* - - ^lo$ - - ^cali.* - - ^tunl.* - - ^flannel.* - - ^kube-ipvs.* - - ^cni.* - - ^nodelocaldns.* - - ^lxc.* ---- -# Source: metallb/templates/speaker.yaml -# FRR expects to have these files owned by frr:frr on startup. -# Having them in a ConfigMap allows us to modify behaviors: for example enabling more daemons on startup. -apiVersion: v1 -kind: ConfigMap -metadata: - name: metallb-frr-startup - namespace: "metallb-system" - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: speaker -data: - daemons: | - # This file tells the frr package which daemons to start. - # - # Sample configurations for these daemons can be found in - # /usr/share/doc/frr/examples/. - # - # ATTENTION: - # - # When activating a daemon for the first time, a config file, even if it is - # empty, has to be present *and* be owned by the user and group "frr", else - # the daemon will not be started by /etc/init.d/frr. The permissions should - # be u=rw,g=r,o=. - # When using "vtysh" such a config file is also needed. It should be owned by - # group "frrvty" and set to ug=rw,o= though. Check /etc/pam.d/frr, too. - # - # The watchfrr and zebra daemons are always started. - # - bgpd=yes - ospfd=no - ospf6d=no - ripd=no - ripngd=no - isisd=no - pimd=no - ldpd=no - nhrpd=no - eigrpd=no - babeld=no - sharpd=no - pbrd=no - bfdd=yes - fabricd=no - vrrpd=no - - # - # If this option is set the /etc/init.d/frr script automatically loads - # the config via "vtysh -b" when the servers are started. - # Check /etc/pam.d/frr if you intend to use "vtysh"! - # - vtysh_enable=yes - zebra_options=" -A 127.0.0.1 -s 90000000 --limit-fds 100000" - bgpd_options=" -A 127.0.0.1 -p 0 --limit-fds 100000" - ospfd_options=" -A 127.0.0.1" - ospf6d_options=" -A ::1" - ripd_options=" -A 127.0.0.1" - ripngd_options=" -A ::1" - isisd_options=" -A 127.0.0.1" - pimd_options=" -A 127.0.0.1" - ldpd_options=" -A 127.0.0.1" - nhrpd_options=" -A 127.0.0.1" - eigrpd_options=" -A 127.0.0.1" - babeld_options=" -A 127.0.0.1" - sharpd_options=" -A 127.0.0.1" - pbrd_options=" -A 127.0.0.1" - staticd_options="-A 127.0.0.1 --limit-fds 100000" - bfdd_options=" -A 127.0.0.1 --limit-fds 100000" - fabricd_options="-A 127.0.0.1" - vrrpd_options=" -A 127.0.0.1" - - # configuration profile - # - #frr_profile="traditional" - #frr_profile="datacenter" - - # - # This is the maximum number of FD's that will be available. - # Upon startup this is read by the control files and ulimit - # is called. Uncomment and use a reasonable value for your - # setup if you are expecting a large number of peers in - # say BGP. - #MAX_FDS=1024 - - # The list of daemons to watch is automatically generated by the init script. - #watchfrr_options="" - - # for debugging purposes, you can specify a "wrap" command to start instead - # of starting the daemon directly, e.g. to use valgrind on ospfd: - # ospfd_wrap="/usr/bin/valgrind" - # or you can use "all_wrap" for all daemons, e.g. to use perf record: - # all_wrap="/usr/bin/perf record --call-graph -" - # the normal daemon command is added to this at the end. - vtysh.conf: |+ - service integrated-vtysh-config - frr.conf: |+ - ! This file gets overriden the first time the speaker renders a config. - ! So anything configured here is only temporary. - frr version 8.0 - frr defaults traditional - hostname Router - line vty - log file /etc/frr/frr.log informational ---- -# Source: metallb/charts/crds/templates/crds.yaml -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.19.0 - name: bfdprofiles.metallb.io -spec: - group: metallb.io - names: - kind: BFDProfile - listKind: BFDProfileList - plural: bfdprofiles - singular: bfdprofile - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .spec.passiveMode - name: Passive Mode - type: boolean - - jsonPath: .spec.transmitInterval - name: Transmit Interval - type: integer - - jsonPath: .spec.receiveInterval - name: Receive Interval - type: integer - - jsonPath: .spec.detectMultiplier - name: Multiplier - type: integer - name: v1beta1 - schema: - openAPIV3Schema: - description: |- - BFDProfile represents the settings of the bfd session that can be - optionally associated with a BGP session. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: BFDProfileSpec defines the desired state of BFDProfile. - properties: - detectMultiplier: - description: |- - Configures the detection multiplier to determine - packet loss. The remote transmission interval will be multiplied - by this value to determine the connection loss detection timer. - format: int32 - maximum: 255 - minimum: 2 - type: integer - echoInterval: - description: |- - Configures the minimal echo receive transmission - interval that this system is capable of handling in milliseconds. - Defaults to 50ms - format: int32 - maximum: 60000 - minimum: 10 - type: integer - echoMode: - description: |- - Enables or disables the echo transmission mode. - This mode is disabled by default, and not supported on multi - hops setups. - type: boolean - minimumTtl: - description: |- - For multi hop sessions only: configure the minimum - expected TTL for an incoming BFD control packet. - format: int32 - maximum: 254 - minimum: 1 - type: integer - passiveMode: - description: |- - Mark session as passive: a passive session will not - attempt to start the connection and will wait for control packets - from peer before it begins replying. - type: boolean - receiveInterval: - description: |- - The minimum interval that this system is capable of - receiving control packets in milliseconds. - Defaults to 300ms. - format: int32 - maximum: 60000 - minimum: 10 - type: integer - transmitInterval: - description: |- - The minimum transmission interval (less jitter) - that this system wants to use to send BFD control packets in - milliseconds. Defaults to 300ms - format: int32 - maximum: 60000 - minimum: 10 - type: integer - type: object - status: - description: BFDProfileStatus defines the observed state of BFDProfile. - type: object - type: object - served: true - storage: true - subresources: - status: {} ---- -# Source: metallb/charts/crds/templates/crds.yaml -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.19.0 - name: bgpadvertisements.metallb.io -spec: - group: metallb.io - names: - kind: BGPAdvertisement - listKind: BGPAdvertisementList - plural: bgpadvertisements - singular: bgpadvertisement - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .spec.ipAddressPools - name: IPAddressPools - type: string - - jsonPath: .spec.ipAddressPoolSelectors - name: IPAddressPool Selectors - type: string - - jsonPath: .spec.peers - name: Peers - type: string - - jsonPath: .spec.nodeSelectors - name: Node Selectors - priority: 10 - type: string - name: v1beta1 - schema: - openAPIV3Schema: - description: |- - BGPAdvertisement allows to advertise the IPs coming - from the selected IPAddressPools via BGP, setting the parameters of the - BGP Advertisement. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: BGPAdvertisementSpec defines the desired state of BGPAdvertisement. - properties: - aggregationLength: - default: 32 - description: The aggregation-length advertisement option lets you “roll up” the /32s into a larger prefix. Defaults to 32. Works for IPv4 addresses. - format: int32 - minimum: 1 - type: integer - aggregationLengthV6: - default: 128 - description: The aggregation-length advertisement option lets you “roll up” the /128s into a larger prefix. Defaults to 128. Works for IPv6 addresses. - format: int32 - type: integer - communities: - description: |- - The BGP communities to be associated with the announcement. Each item can be a standard community of the - form 1234:1234, a large community of the form large:1234:1234:1234 or the name of an alias defined in the - Community CRD. - items: - type: string - type: array - ipAddressPoolSelectors: - description: |- - A selector for the IPAddressPools which would get advertised via this advertisement. - If no IPAddressPool is selected by this or by the list, the advertisement is applied to all the IPAddressPools. - items: - description: |- - A label selector is a label query over a set of resources. The result of matchLabels and - matchExpressions are ANDed. An empty label selector matches all objects. A null - label selector matches no objects. - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - type: array - ipAddressPools: - description: The list of IPAddressPools to advertise via this advertisement, selected by name. - items: - type: string - type: array - localPref: - description: |- - The BGP LOCAL_PREF attribute which is used by BGP best path algorithm, - Path with higher localpref is preferred over one with lower localpref. - format: int32 - type: integer - nodeSelectors: - description: NodeSelectors allows to limit the nodes to announce as next hops for the LoadBalancer IP. When empty, all the nodes having are announced as next hops. - items: - description: |- - A label selector is a label query over a set of resources. The result of matchLabels and - matchExpressions are ANDed. An empty label selector matches all objects. A null - label selector matches no objects. - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - type: array - peers: - description: |- - Peers limits the bgppeer to advertise the ips of the selected pools to. - When empty, the loadbalancer IP is announced to all the BGPPeers configured. - items: - type: string - type: array - type: object - status: - description: BGPAdvertisementStatus defines the observed state of BGPAdvertisement. - type: object - type: object - served: true - storage: true - subresources: - status: {} ---- -# Source: metallb/charts/crds/templates/crds.yaml -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.19.0 - name: bgppeers.metallb.io -spec: - conversion: - strategy: Webhook - webhook: - clientConfig: - service: - name: metallb-webhook-service - namespace: metallb-system - path: /convert - conversionReviewVersions: - - v1beta1 - - v1beta2 - group: metallb.io - names: - kind: BGPPeer - listKind: BGPPeerList - plural: bgppeers - singular: bgppeer - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .spec.peerAddress - name: Address - type: string - - jsonPath: .spec.peerASN - name: ASN - type: string - - jsonPath: .spec.bfdProfile - name: BFD Profile - type: string - - jsonPath: .spec.ebgpMultiHop - name: Multi Hops - type: string - deprecated: true - deprecationWarning: v1beta1 is deprecated, please use v1beta2 - name: v1beta1 - schema: - openAPIV3Schema: - description: BGPPeer is the Schema for the peers API. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: BGPPeerSpec defines the desired state of Peer. - properties: - bfdProfile: - type: string - ebgpMultiHop: - description: EBGP peer is multi-hops away - type: boolean - holdTime: - description: Requested BGP hold time, per RFC4271. - type: string - keepaliveTime: - description: Requested BGP keepalive time, per RFC4271. - type: string - myASN: - description: AS number to use for the local end of the session. - format: int32 - maximum: 4294967295 - minimum: 0 - type: integer - nodeSelectors: - description: |- - Only connect to this peer on nodes that match one of these - selectors. - items: - properties: - matchExpressions: - items: - properties: - key: - type: string - operator: - type: string - values: - items: - type: string - minItems: 1 - type: array - required: - - key - - operator - - values - type: object - type: array - matchLabels: - additionalProperties: - type: string - type: object - type: object - type: array - password: - description: Authentication password for routers enforcing TCP MD5 authenticated sessions - type: string - peerASN: - description: AS number to expect from the remote end of the session. - format: int32 - maximum: 4294967295 - minimum: 0 - type: integer - peerAddress: - description: Address to dial when establishing the session. - type: string - peerPort: - description: Port to dial when establishing the session. - maximum: 16384 - minimum: 0 - type: integer - routerID: - description: BGP router ID to advertise to the peer - type: string - sourceAddress: - description: Source address to use when establishing the session. - type: string - required: - - myASN - - peerASN - - peerAddress - type: object - status: - description: BGPPeerStatus defines the observed state of Peer. - type: object - type: object - served: true - storage: false - subresources: - status: {} - - additionalPrinterColumns: - - jsonPath: .spec.peerAddress - name: Address - type: string - - jsonPath: .spec.peerASN - name: ASN - type: string - - jsonPath: .spec.bfdProfile - name: BFD Profile - type: string - - jsonPath: .spec.ebgpMultiHop - name: Multi Hops - type: string - name: v1beta2 - schema: - openAPIV3Schema: - description: BGPPeer is the Schema for the peers API. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: BGPPeerSpec defines the desired state of Peer. - properties: - bfdProfile: - description: The name of the BFD Profile to be used for the BFD session associated to the BGP session. If not set, the BFD session won't be set up. - type: string - connectTime: - description: Requested BGP connect time, controls how long BGP waits between connection attempts to a neighbor. - type: string - x-kubernetes-validations: - - message: connect time should be between 1 seconds to 65535 - rule: duration(self).getSeconds() >= 1 && duration(self).getSeconds() <= 65535 - - message: connect time should contain a whole number of seconds - rule: duration(self).getMilliseconds() % 1000 == 0 - disableMP: - default: false - description: |- - To set if we want to disable MP BGP that will separate IPv4 and IPv6 route exchanges into distinct BGP sessions. - Deprecated: DisableMP is deprecated in favor of dualStackAddressFamily. - type: boolean - dualStackAddressFamily: - default: false - description: |- - To set if we want to enable the neighbor not only for the ipfamily related to its session, - but also the other one. This allows to advertise/receive IPv4 prefixes over IPv6 sessions and vice versa. - type: boolean - dynamicASN: - description: |- - DynamicASN detects the AS number to use for the remote end of the session - without explicitly setting it via the ASN field. Limited to: - internal - if the neighbor's ASN is different than MyASN connection is denied. - external - if the neighbor's ASN is the same as MyASN the connection is denied. - ASN and DynamicASN are mutually exclusive and one of them must be specified. - enum: - - internal - - external - type: string - ebgpMultiHop: - description: To set if the BGPPeer is multi-hops away. Needed for FRR mode only. - type: boolean - enableGracefulRestart: - description: |- - EnableGracefulRestart allows BGP peer to continue to forward data packets - along known routes while the routing protocol information is being - restored. This field is immutable because it requires restart of the BGP - session. Supported for FRR mode only. - type: boolean - x-kubernetes-validations: - - message: EnableGracefulRestart cannot be changed after creation - rule: self == oldSelf - holdTime: - description: Requested BGP hold time, per RFC4271. - type: string - interface: - description: |- - Interface is the node interface over which the unnumbered BGP peering will - be established. No API validation takes place as that string value - represents an interface name on the host and if user provides an invalid - value, only the actual BGP session will not be established. - Address and Interface are mutually exclusive and one of them must be specified. - type: string - keepaliveTime: - description: Requested BGP keepalive time, per RFC4271. - type: string - myASN: - description: AS number to use for the local end of the session. - format: int32 - maximum: 4294967295 - minimum: 0 - type: integer - nodeSelectors: - description: |- - Only connect to this peer on nodes that match one of these - selectors. - items: - description: |- - A label selector is a label query over a set of resources. The result of matchLabels and - matchExpressions are ANDed. An empty label selector matches all objects. A null - label selector matches no objects. - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - type: array - password: - description: Authentication password for routers enforcing TCP MD5 authenticated sessions - type: string - passwordSecret: - description: |- - passwordSecret is name of the authentication secret for BGP Peer. - the secret must be of type "kubernetes.io/basic-auth", and created in the - same namespace as the MetalLB deployment. The password is stored in the - secret as the key "password". - properties: - name: - description: name is unique within a namespace to reference a secret resource. - type: string - namespace: - description: namespace defines the space within which the secret name must be unique. - type: string - type: object - x-kubernetes-map-type: atomic - peerASN: - description: |- - AS number to expect from the remote end of the session. - ASN and DynamicASN are mutually exclusive and one of them must be specified. - format: int32 - maximum: 4294967295 - minimum: 0 - type: integer - peerAddress: - description: Address to dial when establishing the session. - type: string - peerPort: - default: 179 - description: Port to dial when establishing the session. - maximum: 16384 - minimum: 1 - type: integer - routerID: - description: BGP router ID to advertise to the peer - type: string - sourceAddress: - description: Source address to use when establishing the session. - type: string - vrf: - description: |- - To set if we want to peer with the BGPPeer using an interface belonging to - a host vrf - type: string - required: - - myASN - type: object - status: - description: BGPPeerStatus defines the observed state of Peer. - type: object - type: object - served: true - storage: true - subresources: - status: {} ---- -# Source: metallb/charts/crds/templates/crds.yaml -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.19.0 - name: communities.metallb.io -spec: - group: metallb.io - names: - kind: Community - listKind: CommunityList - plural: communities - singular: community - scope: Namespaced - versions: - - name: v1beta1 - schema: - openAPIV3Schema: - description: |- - Community is a collection of aliases for communities. - Users can define named aliases to be used in the BGPPeer CRD. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: CommunitySpec defines the desired state of Community. - properties: - communities: - items: - properties: - name: - description: The name of the alias for the community. - type: string - value: - description: |- - The BGP community value corresponding to the given name. Can be a standard community of the form 1234:1234 - or a large community of the form large:1234:1234:1234. - type: string - type: object - type: array - type: object - status: - description: CommunityStatus defines the observed state of Community. - type: object - type: object - served: true - storage: true - subresources: - status: {} ---- -# Source: metallb/charts/crds/templates/crds.yaml -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.19.0 - name: configurationstates.metallb.io -spec: - group: metallb.io - names: - kind: ConfigurationState - listKind: ConfigurationStateList - plural: configurationstates - singular: configurationstate - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .status.result - name: Result - type: string - - jsonPath: .status.errorSummary - name: ErrorSummary - type: string - - jsonPath: .metadata.creationTimestamp - name: Age - type: date - name: v1beta1 - schema: - openAPIV3Schema: - description: |- - ConfigurationState is a status-only CRD that reports configuration validation results from MetalLB components. - Labels: - - metallb.io/component-type: "controller" or "speaker" - - metallb.io/node-name: node name (only for speaker) - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - status: - description: ConfigurationStateStatus defines the observed state of ConfigurationState. - properties: - conditions: - description: Conditions contains the status conditions from the reconcilers running in this component. - items: - description: Condition contains details for one aspect of the current state of this API Resource. - properties: - lastTransitionTime: - description: |- - lastTransitionTime is the last time the condition transitioned from one status to another. - This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. - format: date-time - type: string - message: - description: |- - message is a human readable message indicating details about the transition. - This may be an empty string. - maxLength: 32768 - type: string - observedGeneration: - description: |- - observedGeneration represents the .metadata.generation that the condition was set based upon. - For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date - with respect to the current state of the instance. - format: int64 - minimum: 0 - type: integer - reason: - description: |- - reason contains a programmatic identifier indicating the reason for the condition's last transition. - Producers of specific condition types may define expected values and meanings for this field, - and whether the values are considered a guaranteed API. - The value should be a CamelCase string. - This field may not be empty. - maxLength: 1024 - minLength: 1 - pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ - type: string - status: - description: status of the condition, one of True, False, Unknown. - enum: - - "True" - - "False" - - Unknown - type: string - type: - description: type of condition in CamelCase or in foo.example.com/CamelCase. - maxLength: 316 - pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ - type: string - required: - - lastTransitionTime - - message - - reason - - status - - type - type: object - type: array - x-kubernetes-list-map-keys: - - type - x-kubernetes-list-type: map - errorSummary: - description: |- - ErrorSummary contains the aggregated error messages from reconciliation failures. - This field is empty when Result is "Valid". - type: string - result: - description: Result indicates the configuration validation result. - enum: - - Valid - - Invalid - - Unknown - type: string - type: object - type: object - served: true - storage: true - subresources: - status: {} ---- -# Source: metallb/charts/crds/templates/crds.yaml -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.19.0 - name: ipaddresspools.metallb.io -spec: - group: metallb.io - names: - kind: IPAddressPool - listKind: IPAddressPoolList - plural: ipaddresspools - singular: ipaddresspool - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .spec.autoAssign - name: Auto Assign - type: boolean - - jsonPath: .spec.avoidBuggyIPs - name: Avoid Buggy IPs - type: boolean - - jsonPath: .spec.addresses - name: Addresses - type: string - name: v1beta1 - schema: - openAPIV3Schema: - description: |- - IPAddressPool represents a pool of IP addresses that can be allocated - to LoadBalancer services. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: IPAddressPoolSpec defines the desired state of IPAddressPool. - properties: - addresses: - description: |- - A list of IP address ranges over which MetalLB has authority. - You can list multiple ranges in a single pool, they will all share the - same settings. Each range can be either a CIDR prefix, or an explicit - start-end range of IPs. - items: - type: string - type: array - autoAssign: - default: true - description: |- - AutoAssign flag used to prevent MetallB from automatic allocation - for a pool. - type: boolean - avoidBuggyIPs: - default: false - description: |- - AvoidBuggyIPs prevents addresses ending with .0 and .255 - to be used by a pool. - type: boolean - serviceAllocation: - description: |- - AllocateTo makes ip pool allocation to specific namespace and/or service. - The controller will use the pool with lowest value of priority in case of - multiple matches. A pool with no priority set will be used only if the - pools with priority can't be used. If multiple matching IPAddressPools are - available it will check for the availability of IPs sorting the matching - IPAddressPools by priority, starting from the highest to the lowest. If - multiple IPAddressPools have the same priority, choice will be random. - properties: - namespaceSelectors: - description: |- - NamespaceSelectors list of label selectors to select namespace(s) for ip pool, - an alternative to using namespace list. - items: - description: |- - A label selector is a label query over a set of resources. The result of matchLabels and - matchExpressions are ANDed. An empty label selector matches all objects. A null - label selector matches no objects. - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - type: array - namespaces: - description: Namespaces list of namespace(s) on which ip pool can be attached. - items: - type: string - type: array - priority: - description: Priority priority given for ip pool while ip allocation on a service. - type: integer - serviceSelectors: - description: |- - ServiceSelectors list of label selector to select service(s) for which ip pool - can be used for ip allocation. - items: - description: |- - A label selector is a label query over a set of resources. The result of matchLabels and - matchExpressions are ANDed. An empty label selector matches all objects. A null - label selector matches no objects. - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - type: array - type: object - required: - - addresses - type: object - status: - description: IPAddressPoolStatus defines the observed state of IPAddressPool. - properties: - assignedIPv4: - description: AssignedIPv4 is the number of assigned IPv4 addresses. - format: int64 - type: integer - assignedIPv6: - description: AssignedIPv6 is the number of assigned IPv6 addresses. - format: int64 - type: integer - availableIPv4: - description: AvailableIPv4 is the number of available IPv4 addresses. - format: int64 - type: integer - availableIPv6: - description: AvailableIPv6 is the number of available IPv6 addresses. - format: int64 - type: integer - required: - - assignedIPv4 - - assignedIPv6 - - availableIPv4 - - availableIPv6 - type: object - required: - - spec - type: object - served: true - storage: true - subresources: - status: {} ---- -# Source: metallb/charts/crds/templates/crds.yaml -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.19.0 - name: l2advertisements.metallb.io -spec: - group: metallb.io - names: - kind: L2Advertisement - listKind: L2AdvertisementList - plural: l2advertisements - singular: l2advertisement - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .spec.ipAddressPools - name: IPAddressPools - type: string - - jsonPath: .spec.ipAddressPoolSelectors - name: IPAddressPool Selectors - type: string - - jsonPath: .spec.interfaces - name: Interfaces - type: string - - jsonPath: .spec.nodeSelectors - name: Node Selectors - priority: 10 - type: string - name: v1beta1 - schema: - openAPIV3Schema: - description: |- - L2Advertisement allows to advertise the LoadBalancer IPs provided - by the selected pools via L2. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: L2AdvertisementSpec defines the desired state of L2Advertisement. - properties: - interfaces: - description: |- - A list of interfaces to announce from. The LB IP will be announced only from these interfaces. - If the field is not set, we advertise from all the interfaces on the host. - items: - type: string - type: array - ipAddressPoolSelectors: - description: |- - A selector for the IPAddressPools which would get advertised via this advertisement. - If no IPAddressPool is selected by this or by the list, the advertisement is applied to all the IPAddressPools. - items: - description: |- - A label selector is a label query over a set of resources. The result of matchLabels and - matchExpressions are ANDed. An empty label selector matches all objects. A null - label selector matches no objects. - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - type: array - ipAddressPools: - description: The list of IPAddressPools to advertise via this advertisement, selected by name. - items: - type: string - type: array - nodeSelectors: - description: NodeSelectors allows to limit the nodes to announce as next hops for the LoadBalancer IP. When empty, all the nodes having are announced as next hops. - items: - description: |- - A label selector is a label query over a set of resources. The result of matchLabels and - matchExpressions are ANDed. An empty label selector matches all objects. A null - label selector matches no objects. - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. The requirements are ANDed. - items: - description: |- - A label selector requirement is a selector that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector applies to. - type: string - operator: - description: |- - operator represents a key's relationship to a set of values. - Valid operators are In, NotIn, Exists and DoesNotExist. - type: string - values: - description: |- - values is an array of string values. If the operator is In or NotIn, - the values array must be non-empty. If the operator is Exists or DoesNotExist, - the values array must be empty. This array is replaced during a strategic - merge patch. - items: - type: string - type: array - x-kubernetes-list-type: atomic - required: - - key - - operator - type: object - type: array - x-kubernetes-list-type: atomic - matchLabels: - additionalProperties: - type: string - description: |- - matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels - map is equivalent to an element of matchExpressions, whose key field is "key", the - operator is "In", and the values array contains only "value". The requirements are ANDed. - type: object - type: object - x-kubernetes-map-type: atomic - type: array - type: object - status: - description: L2AdvertisementStatus defines the observed state of L2Advertisement. - type: object - type: object - served: true - storage: true - subresources: - status: {} ---- -# Source: metallb/charts/crds/templates/crds.yaml -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.19.0 - name: servicebgpstatuses.metallb.io -spec: - group: metallb.io - names: - kind: ServiceBGPStatus - listKind: ServiceBGPStatusList - plural: servicebgpstatuses - singular: servicebgpstatus - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .status.node - name: Node - type: string - - jsonPath: .status.serviceName - name: Service Name - type: string - - jsonPath: .status.serviceNamespace - name: Service Namespace - type: string - name: v1beta1 - schema: - openAPIV3Schema: - description: ServiceBGPStatus exposes the BGP peers a service is configured to be advertised to, per relevant node. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: ServiceBGPStatusSpec defines the desired state of ServiceBGPStatus. - type: object - status: - description: MetalLBServiceBGPStatus defines the observed state of ServiceBGPStatus. - properties: - node: - description: Node indicates the node announcing the service. - type: string - x-kubernetes-validations: - - message: Value is immutable - rule: self == oldSelf - peers: - description: |- - Peers indicate the BGP peers for which the service is configured to be advertised to. - The service being actually advertised to a given peer depends on the session state and is not indicated here. - items: - type: string - type: array - serviceName: - description: ServiceName indicates the service this status represents. - type: string - x-kubernetes-validations: - - message: Value is immutable - rule: self == oldSelf - serviceNamespace: - description: ServiceNamespace indicates the namespace of the service. - type: string - x-kubernetes-validations: - - message: Value is immutable - rule: self == oldSelf - type: object - type: object - served: true - storage: true - subresources: - status: {} ---- -# Source: metallb/charts/crds/templates/crds.yaml -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.19.0 - name: servicel2statuses.metallb.io -spec: - group: metallb.io - names: - kind: ServiceL2Status - listKind: ServiceL2StatusList - plural: servicel2statuses - singular: servicel2status - scope: Namespaced - versions: - - additionalPrinterColumns: - - jsonPath: .status.node - name: Allocated Node - type: string - - jsonPath: .status.serviceName - name: Service Name - type: string - - jsonPath: .status.serviceNamespace - name: Service Namespace - type: string - name: v1beta1 - schema: - openAPIV3Schema: - description: ServiceL2Status reveals the actual traffic status of loadbalancer services in layer2 mode. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: ServiceL2StatusSpec defines the desired state of ServiceL2Status. - type: object - status: - description: MetalLBServiceL2Status defines the observed state of ServiceL2Status. - properties: - interfaces: - description: Interfaces indicates the interfaces that receive the directed traffic - items: - description: InterfaceInfo defines interface info of layer2 announcement. - properties: - name: - description: Name the name of network interface card - type: string - type: object - type: array - node: - description: Node indicates the node that receives the directed traffic - type: string - x-kubernetes-validations: - - message: Value is immutable - rule: self == oldSelf - serviceName: - description: ServiceName indicates the service this status represents - type: string - x-kubernetes-validations: - - message: Value is immutable - rule: self == oldSelf - serviceNamespace: - description: ServiceNamespace indicates the namespace of the service - type: string - x-kubernetes-validations: - - message: Value is immutable - rule: self == oldSelf - type: object - type: object - served: true - storage: true - subresources: - status: {} ---- -# Source: metallb/templates/rbac.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: metallb:controller - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm -rules: -- apiGroups: [""] - resources: ["services", "namespaces"] - verbs: ["get", "list", "watch"] -- apiGroups: [""] - resources: ["nodes"] - verbs: ["list"] -- apiGroups: [""] - resources: ["services/status"] - verbs: ["update"] -- apiGroups: [""] - resources: ["events"] - verbs: ["create", "patch"] -- apiGroups: ["admissionregistration.k8s.io"] - resources: ["validatingwebhookconfigurations"] - resourceNames: ["metallb-webhook-configuration"] - verbs: ["create", "delete", "get", "list", "patch", "update", "watch"] -- apiGroups: ["admissionregistration.k8s.io"] - resources: ["validatingwebhookconfigurations"] - verbs: ["list", "watch"] -- apiGroups: ["apiextensions.k8s.io"] - resources: ["customresourcedefinitions"] - resourceNames: ["bfdprofiles.metallb.io","bgpadvertisements.metallb.io", - "bgppeers.metallb.io","ipaddresspools.metallb.io","l2advertisements.metallb.io","communities.metallb.io","configurationstates.metallb.io"] - verbs: ["create", "delete", "get", "list", "patch", "update", "watch"] -- apiGroups: ["apiextensions.k8s.io"] - resources: ["customresourcedefinitions"] - verbs: ["list", "watch"] -- apiGroups: ["metallb.io"] - resources: ["configurationstates"] - verbs: ["create", "delete", "get", "list", "patch", "update", "watch"] -- apiGroups: ["metallb.io"] - resources: ["configurationstates/status"] - verbs: ["get", "patch", "update"] ---- -# Source: metallb/templates/rbac.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: metallb:speaker - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm -rules: -- apiGroups: [""] - resources: ["services", "endpoints", "nodes", "namespaces"] - verbs: ["get", "list", "watch"] -- apiGroups: ["discovery.k8s.io"] - resources: ["endpointslices"] - verbs: ["get", "list", "watch"] -- apiGroups: [""] - resources: ["events"] - verbs: ["create", "patch"] -- apiGroups: ["metallb.io"] - resources: ["servicel2statuses","servicel2statuses/status","configurationstates","configurationstates/status"] - verbs: ["*"] ---- -# Source: metallb/templates/rbac.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: metallb:controller - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm -subjects: -- kind: ServiceAccount - name: metallb-controller - namespace: metallb-system -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: metallb:controller ---- -# Source: metallb/templates/rbac.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: metallb:speaker - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm -subjects: -- kind: ServiceAccount - name: metallb-speaker - namespace: metallb-system -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: metallb:speaker ---- -# Source: metallb/templates/rbac.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: metallb-pod-lister - namespace: "metallb-system" - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm -rules: -- apiGroups: [""] - resources: ["pods"] - verbs: ["list", "get"] -- apiGroups: [""] - resources: ["secrets"] - verbs: ["get", "list", "watch"] -- apiGroups: [""] - resources: ["configmaps"] - verbs: ["get", "list", "watch"] -- apiGroups: ["metallb.io"] - resources: ["bfdprofiles"] - verbs: ["get", "list", "watch"] -- apiGroups: ["metallb.io"] - resources: ["bgppeers"] - verbs: ["get", "list", "watch"] -- apiGroups: ["metallb.io"] - resources: ["l2advertisements"] - verbs: ["get", "list", "watch"] -- apiGroups: ["metallb.io"] - resources: ["bgpadvertisements"] - verbs: ["get", "list", "watch"] -- apiGroups: ["metallb.io"] - resources: ["ipaddresspools"] - verbs: ["get", "list", "watch"] -- apiGroups: ["metallb.io"] - resources: ["communities"] - verbs: ["get", "list", "watch"] -- apiGroups: ["metallb.io"] - resources: ["servicebgpstatuses","servicebgpstatuses/status"] - verbs: ["*"] ---- -# Source: metallb/templates/rbac.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: metallb-controller - namespace: "metallb-system" - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm -rules: -- apiGroups: [""] - resources: ["secrets"] - verbs: ["create", "get", "list", "watch"] -- apiGroups: [""] - resources: ["secrets"] - resourceNames: ["metallb-memberlist"] - verbs: ["list"] -- apiGroups: ["apps"] - resources: ["deployments"] - resourceNames: ["metallb-controller"] - verbs: ["get"] -- apiGroups: [""] - resources: ["secrets"] - verbs: ["create", "delete", "get", "list", "patch", "update", "watch"] -- apiGroups: ["metallb.io"] - resources: ["ipaddresspools"] - verbs: ["get", "list", "watch"] -- apiGroups: ["metallb.io"] - resources: ["ipaddresspools/status"] - verbs: ["update"] -- apiGroups: ["metallb.io"] - resources: ["bgppeers"] - verbs: ["get", "list"] -- apiGroups: ["metallb.io"] - resources: ["bgpadvertisements"] - verbs: ["get", "list"] -- apiGroups: ["metallb.io"] - resources: ["l2advertisements"] - verbs: ["get", "list"] -- apiGroups: ["metallb.io"] - resources: ["communities"] - verbs: ["get", "list","watch"] -- apiGroups: ["metallb.io"] - resources: ["bfdprofiles"] - verbs: ["get", "list","watch"] ---- -# Source: metallb/templates/rbac.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: metallb-pod-lister - namespace: "metallb-system" - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: metallb-pod-lister -subjects: -- kind: ServiceAccount - name: metallb-speaker ---- -# Source: metallb/templates/rbac.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: metallb-controller - namespace: "metallb-system" - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: metallb-controller -subjects: -- kind: ServiceAccount - name: metallb-controller ---- -# Source: metallb/templates/webhooks.yaml -apiVersion: v1 -kind: Service -metadata: - name: metallb-webhook-service - namespace: "metallb-system" - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm -spec: - ports: - - port: 443 - targetPort: 9443 - selector: - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/component: controller ---- -# Source: metallb/templates/speaker.yaml -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: metallb-speaker - namespace: "metallb-system" - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: speaker -spec: - updateStrategy: - type: RollingUpdate - selector: - matchLabels: - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/component: speaker - template: - metadata: - labels: - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/component: speaker - spec: - serviceAccountName: metallb-speaker - terminationGracePeriodSeconds: 0 - hostNetwork: true - volumes: - - name: memberlist - secret: - secretName: metallb-memberlist - defaultMode: 420 - - name: metallb-excludel2 - configMap: - defaultMode: 256 - name: metallb-excludel2 - - name: frr-sockets - emptyDir: {} - - name: frr-startup - configMap: - name: metallb-frr-startup - - name: frr-conf - emptyDir: {} - - name: reloader - emptyDir: {} - - name: metrics - emptyDir: {} - - name: frr-tmp - emptyDir: {} - - name: frr-lib - emptyDir: {} - - name: frr-log - emptyDir: {} - initContainers: - # Copies the initial config files with the right permissions to the shared volume. - - name: cp-frr-files - image: quay.io/frrouting/frr:10.4.1 - securityContext: - runAsUser: 100 - runAsGroup: 101 - command: ["/bin/sh", "-c", "cp -rLf /tmp/frr/* /etc/frr/"] - volumeMounts: - - name: frr-startup - mountPath: /tmp/frr - - name: frr-conf - mountPath: /etc/frr - # Copies the reloader to the shared volume between the speaker and reloader. - - name: cp-reloader - image: quay.io/metallb/speaker:v0.15.3 - command: ["/cp-tool","/frr-reloader.sh","/etc/frr_reloader/frr-reloader.sh"] - volumeMounts: - - name: reloader - mountPath: /etc/frr_reloader - # Copies the metrics exporter - - name: cp-metrics - image: quay.io/metallb/speaker:v0.15.3 - command: ["/cp-tool","/frr-metrics","/etc/frr_metrics/frr-metrics"] - volumeMounts: - - name: metrics - mountPath: /etc/frr_metrics - shareProcessNamespace: true - containers: - - name: speaker - image: quay.io/metallb/speaker:v0.15.3 - args: - - --port=7472 - - --log-level=info - env: - - name: METALLB_NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: METALLB_HOST - valueFrom: - fieldRef: - fieldPath: status.hostIP - - name: METALLB_ML_BIND_ADDR - valueFrom: - fieldRef: - fieldPath: status.podIP - - - name: METALLB_ML_LABELS - value: "app.kubernetes.io/name=metallb,app.kubernetes.io/component=speaker" - - name: METALLB_ML_BIND_PORT - value: "7946" - - name: METALLB_ML_SECRET_KEY_PATH - value: "/etc/ml_secret_key" - - name: FRR_CONFIG_FILE - value: /etc/frr_reloader/frr.conf - - name: FRR_RELOADER_PID_FILE - value: /etc/frr_reloader/reloader.pid - - name: METALLB_BGP_TYPE - value: frr - - name: METALLB_POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - ports: - - name: monitoring - containerPort: 7472 - - name: memberlist-tcp - containerPort: 7946 - protocol: TCP - - name: memberlist-udp - containerPort: 7946 - protocol: UDP - livenessProbe: - httpGet: - path: /metrics - port: monitoring - initialDelaySeconds: 10 - periodSeconds: 10 - timeoutSeconds: 1 - successThreshold: 1 - failureThreshold: 3 - readinessProbe: - httpGet: - path: /metrics - port: monitoring - initialDelaySeconds: 10 - periodSeconds: 10 - timeoutSeconds: 1 - successThreshold: 1 - failureThreshold: 3 - securityContext: - allowPrivilegeEscalation: false - readOnlyRootFilesystem: true - capabilities: - drop: - - ALL - add: - - NET_RAW - volumeMounts: - - name: memberlist - mountPath: /etc/ml_secret_key - - name: reloader - mountPath: /etc/frr_reloader - - name: metallb-excludel2 - mountPath: /etc/metallb - - name: frr - securityContext: - readOnlyRootFilesystem: true - allowPrivilegeEscalation: false - capabilities: - add: - - NET_ADMIN - - NET_RAW - - SYS_ADMIN - - NET_BIND_SERVICE - image: quay.io/frrouting/frr:10.4.1 - env: - - name: TINI_SUBREAPER - value: "true" - volumeMounts: - - name: frr-sockets - mountPath: /var/run/frr - - name: frr-conf - mountPath: /etc/frr - - name: frr-tmp - mountPath: /var/tmp/frr - - name: frr-lib - mountPath: /var/lib/frr - # The command is FRR's default entrypoint & waiting for the log file to appear and tailing it. - # If the log file isn't created in 60 seconds the tail fails and the container is restarted. - # This workaround is needed to have the frr logs as part of kubectl logs -c frr < speaker_pod_name >. - command: - - /bin/sh - - -c - - | - /sbin/tini -- /usr/lib/frr/docker-start & - attempts=0 - until [[ -f /etc/frr/frr.log || $attempts -eq 60 ]]; do - sleep 1 - attempts=$(( $attempts + 1 )) - done - tail -f /etc/frr/frr.log - livenessProbe: - httpGet: - path: livez - port: 7473 - initialDelaySeconds: 10 - periodSeconds: 10 - timeoutSeconds: 1 - successThreshold: 1 - failureThreshold: 3 - startupProbe: - httpGet: - path: /livez - port: 7473 - failureThreshold: 30 - periodSeconds: 5 - - name: reloader - image: quay.io/frrouting/frr:10.4.1 - securityContext: - readOnlyRootFilesystem: true - allowPrivilegeEscalation: false - command: ["/etc/frr_reloader/frr-reloader.sh"] - volumeMounts: - - name: frr-sockets - mountPath: /var/run/frr - - name: frr-conf - mountPath: /etc/frr - - name: reloader - mountPath: /etc/frr_reloader - - name: frr-log - mountPath: /var/log/frr - - name: frr-metrics - image: quay.io/frrouting/frr:10.4.1 - securityContext: - readOnlyRootFilesystem: true - allowPrivilegeEscalation: false - command: ["/etc/frr_metrics/frr-metrics"] - args: - - --metrics-port=7473 - env: - - name: VTYSH_HISTFILE - value: /dev/null - ports: - - containerPort: 7473 - name: frrmetrics - volumeMounts: - - name: frr-sockets - mountPath: /var/run/frr - - name: frr-conf - mountPath: /etc/frr - - name: metrics - mountPath: /etc/frr_metrics - nodeSelector: - "kubernetes.io/os": linux - tolerations: - - key: node-role.kubernetes.io/master - effect: NoSchedule - operator: Exists - - key: node-role.kubernetes.io/control-plane - effect: NoSchedule - operator: Exists ---- -# Source: metallb/templates/controller.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: metallb-controller - namespace: "metallb-system" - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: controller -spec: - strategy: - type: RollingUpdate - selector: - matchLabels: - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/component: controller - template: - metadata: - labels: - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/component: controller - spec: - serviceAccountName: metallb-controller - terminationGracePeriodSeconds: 0 - securityContext: - fsGroup: 65534 - runAsNonRoot: true - runAsUser: 65534 - containers: - - name: controller - image: quay.io/metallb/controller:v0.15.3 - args: - - --port=7472 - - --log-level=info - - --webhook-mode=enabled - - --tls-min-version=VersionTLS12 - env: - - name: METALLB_ML_SECRET_NAME - value: metallb-memberlist - - name: METALLB_DEPLOYMENT - value: metallb-controller - - name: METALLB_BGP_TYPE - value: frr - ports: - - name: monitoring - containerPort: 7472 - - containerPort: 9443 - name: webhook-server - protocol: TCP - volumeMounts: - - mountPath: /tmp/k8s-webhook-server/serving-certs - name: cert - readOnly: true - livenessProbe: - httpGet: - path: /metrics - port: monitoring - initialDelaySeconds: 10 - periodSeconds: 10 - timeoutSeconds: 1 - successThreshold: 1 - failureThreshold: 3 - readinessProbe: - httpGet: - path: /metrics - port: monitoring - initialDelaySeconds: 10 - periodSeconds: 10 - timeoutSeconds: 1 - successThreshold: 1 - failureThreshold: 3 - securityContext: - allowPrivilegeEscalation: false - readOnlyRootFilesystem: true - capabilities: - drop: - - ALL - nodeSelector: - "kubernetes.io/os": linux - volumes: - - name: cert - secret: - defaultMode: 420 - secretName: metallb-webhook-cert ---- -# Source: metallb/templates/webhooks.yaml -apiVersion: admissionregistration.k8s.io/v1 -kind: ValidatingWebhookConfiguration -metadata: - name: metallb-webhook-configuration - labels: - helm.sh/chart: metallb-0.15.3 - app.kubernetes.io/name: metallb - app.kubernetes.io/instance: metallb - app.kubernetes.io/version: "v0.15.3" - app.kubernetes.io/managed-by: Helm -webhooks: -- admissionReviewVersions: - - v1 - clientConfig: - service: - name: metallb-webhook-service - namespace: metallb-system - path: /validate-metallb-io-v1beta2-bgppeer - failurePolicy: Fail - name: bgppeervalidationwebhook.metallb.io - rules: - - apiGroups: - - metallb.io - apiVersions: - - v1beta2 - operations: - - CREATE - - UPDATE - resources: - - bgppeers - sideEffects: None -- admissionReviewVersions: - - v1 - clientConfig: - service: - name: metallb-webhook-service - namespace: metallb-system - path: /validate-metallb-io-v1beta1-ipaddresspool - failurePolicy: Fail - name: ipaddresspoolvalidationwebhook.metallb.io - rules: - - apiGroups: - - metallb.io - apiVersions: - - v1beta1 - operations: - - CREATE - - UPDATE - resources: - - ipaddresspools - sideEffects: None -- admissionReviewVersions: - - v1 - clientConfig: - service: - name: metallb-webhook-service - namespace: metallb-system - path: /validate-metallb-io-v1beta1-bgpadvertisement - failurePolicy: Fail - name: bgpadvertisementvalidationwebhook.metallb.io - rules: - - apiGroups: - - metallb.io - apiVersions: - - v1beta1 - operations: - - CREATE - - UPDATE - resources: - - bgpadvertisements - sideEffects: None -- admissionReviewVersions: - - v1 - clientConfig: - service: - name: metallb-webhook-service - namespace: metallb-system - path: /validate-metallb-io-v1beta1-community - failurePolicy: Fail - name: communityvalidationwebhook.metallb.io - rules: - - apiGroups: - - metallb.io - apiVersions: - - v1beta1 - operations: - - CREATE - - UPDATE - resources: - - communities - sideEffects: None -- admissionReviewVersions: - - v1 - clientConfig: - service: - name: metallb-webhook-service - namespace: metallb-system - path: /validate-metallb-io-v1beta1-bfdprofile - failurePolicy: Fail - name: bfdprofilevalidationwebhook.metallb.io - rules: - - apiGroups: - - metallb.io - apiVersions: - - v1beta1 - operations: - - CREATE - - DELETE - resources: - - bfdprofiles - sideEffects: None -- admissionReviewVersions: - - v1 - clientConfig: - service: - name: metallb-webhook-service - namespace: metallb-system - path: /validate-metallb-io-v1beta1-l2advertisement - failurePolicy: Fail - name: l2advertisementvalidationwebhook.metallb.io - rules: - - apiGroups: - - metallb.io - apiVersions: - - v1beta1 - operations: - - CREATE - - UPDATE - resources: - - l2advertisements - sideEffects: None diff --git a/infrastructure/metallb/patches/node-placement.yaml b/infrastructure/metallb/patches/node-placement.yaml deleted file mode 100644 index c42ae99..0000000 --- a/infrastructure/metallb/patches/node-placement.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# infrastructure/metallb/patches/node-placement.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: metallb-controller - namespace: metallb-system -spec: - template: - spec: - containers: - - name: controller - args: - - --port=7472 - - --log-level=info - - --webhook-mode=enabled - - --tls-min-version=VersionTLS12 - - --lb-class=metallb - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: hardware - operator: In - values: - - rpi4 - - rpi5 diff --git a/infrastructure/metallb/patches/speaker-loglevel.yaml b/infrastructure/metallb/patches/speaker-loglevel.yaml deleted file mode 100644 index 61b8942..0000000 --- a/infrastructure/metallb/patches/speaker-loglevel.yaml +++ /dev/null @@ -1,15 +0,0 @@ -# infrastructure/metallb/patches/speaker-loglevel.yaml -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: metallb-speaker - namespace: metallb-system -spec: - template: - spec: - containers: - - name: speaker - args: - - --port=7472 - - --log-level=info - - --lb-class=metallb diff --git a/infrastructure/modules/base/storageclass/asteria-encrypted.yaml b/infrastructure/modules/base/storageclass/asteria-encrypted.yaml new file mode 100644 index 0000000..a6eb566 --- /dev/null +++ b/infrastructure/modules/base/storageclass/asteria-encrypted.yaml @@ -0,0 +1,24 @@ +# infrastructure/modules/base/storageclass/asteria-encrypted.yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: asteria-encrypted +parameters: + diskSelector: asteria + fromBackup: "" + numberOfReplicas: "2" + staleReplicaTimeout: "30" + fsType: "ext4" + replicaAutoBalance: "least-effort" + dataLocality: "disabled" + encrypted: "true" + csi.storage.k8s.io/provisioner-secret-name: ${pvc.name} + csi.storage.k8s.io/provisioner-secret-namespace: ${pvc.namespace} + csi.storage.k8s.io/node-publish-secret-name: ${pvc.name} + csi.storage.k8s.io/node-publish-secret-namespace: ${pvc.namespace} + csi.storage.k8s.io/node-stage-secret-name: ${pvc.name} + csi.storage.k8s.io/node-stage-secret-namespace: ${pvc.namespace} +provisioner: driver.longhorn.io +reclaimPolicy: Retain +allowVolumeExpansion: true +volumeBindingMode: Immediate diff --git a/infrastructure/modules/base/storageclass/kustomization.yaml b/infrastructure/modules/base/storageclass/kustomization.yaml index 704dd73..44d79c7 100644 --- a/infrastructure/modules/base/storageclass/kustomization.yaml +++ b/infrastructure/modules/base/storageclass/kustomization.yaml @@ -3,4 +3,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - asteria.yaml + - asteria-encrypted.yaml - astreae.yaml diff --git a/infrastructure/postgres/secretproviderclass.yaml b/infrastructure/postgres/secretproviderclass.yaml index b9317a1..3a65075 100644 --- a/infrastructure/postgres/secretproviderclass.yaml +++ b/infrastructure/postgres/secretproviderclass.yaml @@ -11,5 +11,5 @@ spec: roleName: "postgres" objects: | - objectName: "postgres_password" - secretPath: "kv/data/postgres" + secretPath: "kv/data/atlas/postgres/postgres-db" secretKey: "POSTGRES_PASSWORD" diff --git a/infrastructure/sources/cert-manager/letsencrypt-prod.yaml b/infrastructure/sources/cert-manager/letsencrypt-prod.yaml index 65bf316..7f90f01 100644 --- a/infrastructure/sources/cert-manager/letsencrypt-prod.yaml +++ b/infrastructure/sources/cert-manager/letsencrypt-prod.yaml @@ -1,3 +1,4 @@ +# infrastructure/sources/cert-manager/letsencrypt-prod.yaml apiVersion: cert-manager.io/v1 kind: ClusterIssuer metadata: diff --git a/infrastructure/sources/cert-manager/letsencrypt.yaml b/infrastructure/sources/cert-manager/letsencrypt.yaml index 13590f3..a988312 100644 --- a/infrastructure/sources/cert-manager/letsencrypt.yaml +++ b/infrastructure/sources/cert-manager/letsencrypt.yaml @@ -1,3 +1,4 @@ +# infrastructure/sources/cert-manager/letsencrypt.yaml apiVersion: cert-manager.io/v1 kind: ClusterIssuer metadata: diff --git a/infrastructure/sources/helm/ananace.yaml b/infrastructure/sources/helm/ananace.yaml new file mode 100644 index 0000000..b5e8a7b --- /dev/null +++ b/infrastructure/sources/helm/ananace.yaml @@ -0,0 +1,9 @@ +# infrastructure/sources/helm/ananace.yaml +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: ananace + namespace: flux-system +spec: + interval: 1h + url: https://ananace.gitlab.io/charts diff --git a/infrastructure/sources/helm/kustomization.yaml b/infrastructure/sources/helm/kustomization.yaml index c8d20bb..e45f58f 100644 --- a/infrastructure/sources/helm/kustomization.yaml +++ b/infrastructure/sources/helm/kustomization.yaml @@ -2,15 +2,18 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: + - ananace.yaml - fluent-bit.yaml - grafana.yaml - hashicorp.yaml - jetstack.yaml - jenkins.yaml - mailu.yaml + - metallb.yaml - opentelemetry.yaml - opensearch.yaml - harbor.yaml + - longhorn.yaml - prometheus.yaml - victoria-metrics.yaml - secrets-store-csi.yaml diff --git a/infrastructure/sources/helm/longhorn.yaml b/infrastructure/sources/helm/longhorn.yaml new file mode 100644 index 0000000..3a2d728 --- /dev/null +++ b/infrastructure/sources/helm/longhorn.yaml @@ -0,0 +1,9 @@ +# infrastructure/sources/helm/longhorn.yaml +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: longhorn + namespace: flux-system +spec: + interval: 30m + url: https://charts.longhorn.io diff --git a/infrastructure/sources/helm/metallb.yaml b/infrastructure/sources/helm/metallb.yaml new file mode 100644 index 0000000..12021af --- /dev/null +++ b/infrastructure/sources/helm/metallb.yaml @@ -0,0 +1,9 @@ +# infrastructure/sources/helm/metallb.yaml +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: metallb + namespace: flux-system +spec: + interval: 1h + url: https://metallb.github.io/metallb diff --git a/infrastructure/traefik/crds.yaml b/infrastructure/traefik/crds.yaml new file mode 100644 index 0000000..21f26f9 --- /dev/null +++ b/infrastructure/traefik/crds.yaml @@ -0,0 +1,3119 @@ +# infrastructure/traefik/crds.yaml +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: ingressroutes.traefik.io +spec: + group: traefik.io + names: + kind: IngressRoute + listKind: IngressRouteList + plural: ingressroutes + singular: ingressroute + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: IngressRoute is the CRD implementation of a Traefik HTTP Router. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: IngressRouteSpec defines the desired state of IngressRoute. + properties: + entryPoints: + description: |- + EntryPoints defines the list of entry point names to bind to. + Entry points have to be configured in the static configuration. + More info: https://doc.traefik.io/traefik/v3.3/routing/entrypoints/ + Default: all. + items: + type: string + type: array + routes: + description: Routes defines the list of routes. + items: + description: Route holds the HTTP route configuration. + properties: + kind: + description: |- + Kind defines the kind of the route. + Rule is the only supported kind. + If not defined, defaults to Rule. + enum: + - Rule + type: string + match: + description: |- + Match defines the router's rule. + More info: https://doc.traefik.io/traefik/v3.3/routing/routers/#rule + type: string + middlewares: + description: |- + Middlewares defines the list of references to Middleware resources. + More info: https://doc.traefik.io/traefik/v3.3/routing/providers/kubernetes-crd/#kind-middleware + items: + description: MiddlewareRef is a reference to a Middleware + resource. + properties: + name: + description: Name defines the name of the referenced Middleware + resource. + type: string + namespace: + description: Namespace defines the namespace of the referenced + Middleware resource. + type: string + required: + - name + type: object + type: array + observability: + description: |- + Observability defines the observability configuration for a router. + More info: https://doc.traefik.io/traefik/v3.2/routing/routers/#observability + properties: + accessLogs: + type: boolean + metrics: + type: boolean + tracing: + type: boolean + type: object + priority: + description: |- + Priority defines the router's priority. + More info: https://doc.traefik.io/traefik/v3.3/routing/routers/#priority + type: integer + services: + description: |- + Services defines the list of Service. + It can contain any combination of TraefikService and/or reference to a Kubernetes Service. + items: + description: Service defines an upstream HTTP service to proxy + traffic to. + properties: + healthCheck: + description: Healthcheck defines health checks for ExternalName + services. + properties: + followRedirects: + description: |- + FollowRedirects defines whether redirects should be followed during the health check calls. + Default: true + type: boolean + headers: + additionalProperties: + type: string + description: Headers defines custom headers to be + sent to the health check endpoint. + type: object + hostname: + description: Hostname defines the value of hostname + in the Host header of the health check request. + type: string + interval: + anyOf: + - type: integer + - type: string + description: |- + Interval defines the frequency of the health check calls. + Default: 30s + x-kubernetes-int-or-string: true + method: + description: Method defines the healthcheck method. + type: string + mode: + description: |- + Mode defines the health check mode. + If defined to grpc, will use the gRPC health check protocol to probe the server. + Default: http + type: string + path: + description: Path defines the server URL path for + the health check endpoint. + type: string + port: + description: Port defines the server URL port for + the health check endpoint. + type: integer + scheme: + description: Scheme replaces the server URL scheme + for the health check endpoint. + type: string + status: + description: Status defines the expected HTTP status + code of the response to the health check request. + type: integer + timeout: + anyOf: + - type: integer + - type: string + description: |- + Timeout defines the maximum duration Traefik will wait for a health check request before considering the server unhealthy. + Default: 5s + x-kubernetes-int-or-string: true + type: object + kind: + description: Kind defines the kind of the Service. + enum: + - Service + - TraefikService + type: string + name: + description: |- + Name defines the name of the referenced Kubernetes Service or TraefikService. + The differentiation between the two is specified in the Kind field. + type: string + namespace: + description: Namespace defines the namespace of the referenced + Kubernetes Service or TraefikService. + type: string + nativeLB: + description: |- + NativeLB controls, when creating the load-balancer, + whether the LB's children are directly the pods IPs or if the only child is the Kubernetes Service clusterIP. + The Kubernetes Service itself does load-balance to the pods. + By default, NativeLB is false. + type: boolean + nodePortLB: + description: |- + NodePortLB controls, when creating the load-balancer, + whether the LB's children are directly the nodes internal IPs using the nodePort when the service type is NodePort. + It allows services to be reachable when Traefik runs externally from the Kubernetes cluster but within the same network of the nodes. + By default, NodePortLB is false. + type: boolean + passHostHeader: + description: |- + PassHostHeader defines whether the client Host header is forwarded to the upstream Kubernetes Service. + By default, passHostHeader is true. + type: boolean + port: + anyOf: + - type: integer + - type: string + description: |- + Port defines the port of a Kubernetes Service. + This can be a reference to a named port. + x-kubernetes-int-or-string: true + responseForwarding: + description: ResponseForwarding defines how Traefik forwards + the response from the upstream Kubernetes Service to + the client. + properties: + flushInterval: + description: |- + FlushInterval defines the interval, in milliseconds, in between flushes to the client while copying the response body. + A negative value means to flush immediately after each write to the client. + This configuration is ignored when ReverseProxy recognizes a response as a streaming response; + for such responses, writes are flushed to the client immediately. + Default: 100ms + type: string + type: object + scheme: + description: |- + Scheme defines the scheme to use for the request to the upstream Kubernetes Service. + It defaults to https when Kubernetes Service port is 443, http otherwise. + type: string + serversTransport: + description: |- + ServersTransport defines the name of ServersTransport resource to use. + It allows to configure the transport between Traefik and your servers. + Can only be used on a Kubernetes Service. + type: string + sticky: + description: |- + Sticky defines the sticky sessions configuration. + More info: https://doc.traefik.io/traefik/v3.3/routing/services/#sticky-sessions + properties: + cookie: + description: Cookie defines the sticky cookie configuration. + properties: + httpOnly: + description: HTTPOnly defines whether the cookie + can be accessed by client-side APIs, such as + JavaScript. + type: boolean + maxAge: + description: |- + MaxAge defines the number of seconds until the cookie expires. + When set to a negative number, the cookie expires immediately. + When set to zero, the cookie never expires. + type: integer + name: + description: Name defines the Cookie name. + type: string + path: + description: |- + Path defines the path that must exist in the requested URL for the browser to send the Cookie header. + When not provided the cookie will be sent on every request to the domain. + More info: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie#pathpath-value + type: string + sameSite: + description: |- + SameSite defines the same site policy. + More info: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie/SameSite + type: string + secure: + description: Secure defines whether the cookie + can only be transmitted over an encrypted connection + (i.e. HTTPS). + type: boolean + type: object + type: object + strategy: + description: |- + Strategy defines the load balancing strategy between the servers. + RoundRobin is the only supported value at the moment. + type: string + weight: + description: |- + Weight defines the weight and should only be specified when Name references a TraefikService object + (and to be precise, one that embeds a Weighted Round Robin). + type: integer + required: + - name + type: object + type: array + syntax: + description: |- + Syntax defines the router's rule syntax. + More info: https://doc.traefik.io/traefik/v3.3/routing/routers/#rulesyntax + type: string + required: + - match + type: object + type: array + tls: + description: |- + TLS defines the TLS configuration. + More info: https://doc.traefik.io/traefik/v3.3/routing/routers/#tls + properties: + certResolver: + description: |- + CertResolver defines the name of the certificate resolver to use. + Cert resolvers have to be configured in the static configuration. + More info: https://doc.traefik.io/traefik/v3.3/https/acme/#certificate-resolvers + type: string + domains: + description: |- + Domains defines the list of domains that will be used to issue certificates. + More info: https://doc.traefik.io/traefik/v3.3/routing/routers/#domains + items: + description: Domain holds a domain name with SANs. + properties: + main: + description: Main defines the main domain name. + type: string + sans: + description: SANs defines the subject alternative domain + names. + items: + type: string + type: array + type: object + type: array + options: + description: |- + Options defines the reference to a TLSOption, that specifies the parameters of the TLS connection. + If not defined, the `default` TLSOption is used. + More info: https://doc.traefik.io/traefik/v3.3/https/tls/#tls-options + properties: + name: + description: |- + Name defines the name of the referenced TLSOption. + More info: https://doc.traefik.io/traefik/v3.3/routing/providers/kubernetes-crd/#kind-tlsoption + type: string + namespace: + description: |- + Namespace defines the namespace of the referenced TLSOption. + More info: https://doc.traefik.io/traefik/v3.3/routing/providers/kubernetes-crd/#kind-tlsoption + type: string + required: + - name + type: object + secretName: + description: SecretName is the name of the referenced Kubernetes + Secret to specify the certificate details. + type: string + store: + description: |- + Store defines the reference to the TLSStore, that will be used to store certificates. + Please note that only `default` TLSStore can be used. + properties: + name: + description: |- + Name defines the name of the referenced TLSStore. + More info: https://doc.traefik.io/traefik/v3.3/routing/providers/kubernetes-crd/#kind-tlsstore + type: string + namespace: + description: |- + Namespace defines the namespace of the referenced TLSStore. + More info: https://doc.traefik.io/traefik/v3.3/routing/providers/kubernetes-crd/#kind-tlsstore + type: string + required: + - name + type: object + type: object + required: + - routes + type: object + required: + - metadata + - spec + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: ingressroutetcps.traefik.io +spec: + group: traefik.io + names: + kind: IngressRouteTCP + listKind: IngressRouteTCPList + plural: ingressroutetcps + singular: ingressroutetcp + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: IngressRouteTCP is the CRD implementation of a Traefik TCP Router. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: IngressRouteTCPSpec defines the desired state of IngressRouteTCP. + properties: + entryPoints: + description: |- + EntryPoints defines the list of entry point names to bind to. + Entry points have to be configured in the static configuration. + More info: https://doc.traefik.io/traefik/v3.3/routing/entrypoints/ + Default: all. + items: + type: string + type: array + routes: + description: Routes defines the list of routes. + items: + description: RouteTCP holds the TCP route configuration. + properties: + match: + description: |- + Match defines the router's rule. + More info: https://doc.traefik.io/traefik/v3.3/routing/routers/#rule_1 + type: string + middlewares: + description: Middlewares defines the list of references to MiddlewareTCP + resources. + items: + description: ObjectReference is a generic reference to a Traefik + resource. + properties: + name: + description: Name defines the name of the referenced Traefik + resource. + type: string + namespace: + description: Namespace defines the namespace of the referenced + Traefik resource. + type: string + required: + - name + type: object + type: array + priority: + description: |- + Priority defines the router's priority. + More info: https://doc.traefik.io/traefik/v3.3/routing/routers/#priority_1 + type: integer + services: + description: Services defines the list of TCP services. + items: + description: ServiceTCP defines an upstream TCP service to + proxy traffic to. + properties: + name: + description: Name defines the name of the referenced Kubernetes + Service. + type: string + namespace: + description: Namespace defines the namespace of the referenced + Kubernetes Service. + type: string + nativeLB: + description: |- + NativeLB controls, when creating the load-balancer, + whether the LB's children are directly the pods IPs or if the only child is the Kubernetes Service clusterIP. + The Kubernetes Service itself does load-balance to the pods. + By default, NativeLB is false. + type: boolean + nodePortLB: + description: |- + NodePortLB controls, when creating the load-balancer, + whether the LB's children are directly the nodes internal IPs using the nodePort when the service type is NodePort. + It allows services to be reachable when Traefik runs externally from the Kubernetes cluster but within the same network of the nodes. + By default, NodePortLB is false. + type: boolean + port: + anyOf: + - type: integer + - type: string + description: |- + Port defines the port of a Kubernetes Service. + This can be a reference to a named port. + x-kubernetes-int-or-string: true + proxyProtocol: + description: |- + ProxyProtocol defines the PROXY protocol configuration. + More info: https://doc.traefik.io/traefik/v3.3/routing/services/#proxy-protocol + properties: + version: + description: Version defines the PROXY Protocol version + to use. + type: integer + type: object + serversTransport: + description: |- + ServersTransport defines the name of ServersTransportTCP resource to use. + It allows to configure the transport between Traefik and your servers. + Can only be used on a Kubernetes Service. + type: string + terminationDelay: + description: |- + TerminationDelay defines the deadline that the proxy sets, after one of its connected peers indicates + it has closed the writing capability of its connection, to close the reading capability as well, + hence fully terminating the connection. + It is a duration in milliseconds, defaulting to 100. + A negative value means an infinite deadline (i.e. the reading capability is never closed). + Deprecated: TerminationDelay will not be supported in future APIVersions, please use ServersTransport to configure the TerminationDelay instead. + type: integer + tls: + description: TLS determines whether to use TLS when dialing + with the backend. + type: boolean + weight: + description: Weight defines the weight used when balancing + requests between multiple Kubernetes Service. + type: integer + required: + - name + - port + type: object + type: array + syntax: + description: |- + Syntax defines the router's rule syntax. + More info: https://doc.traefik.io/traefik/v3.3/routing/routers/#rulesyntax_1 + type: string + required: + - match + type: object + type: array + tls: + description: |- + TLS defines the TLS configuration on a layer 4 / TCP Route. + More info: https://doc.traefik.io/traefik/v3.3/routing/routers/#tls_1 + properties: + certResolver: + description: |- + CertResolver defines the name of the certificate resolver to use. + Cert resolvers have to be configured in the static configuration. + More info: https://doc.traefik.io/traefik/v3.3/https/acme/#certificate-resolvers + type: string + domains: + description: |- + Domains defines the list of domains that will be used to issue certificates. + More info: https://doc.traefik.io/traefik/v3.3/routing/routers/#domains + items: + description: Domain holds a domain name with SANs. + properties: + main: + description: Main defines the main domain name. + type: string + sans: + description: SANs defines the subject alternative domain + names. + items: + type: string + type: array + type: object + type: array + options: + description: |- + Options defines the reference to a TLSOption, that specifies the parameters of the TLS connection. + If not defined, the `default` TLSOption is used. + More info: https://doc.traefik.io/traefik/v3.3/https/tls/#tls-options + properties: + name: + description: Name defines the name of the referenced Traefik + resource. + type: string + namespace: + description: Namespace defines the namespace of the referenced + Traefik resource. + type: string + required: + - name + type: object + passthrough: + description: Passthrough defines whether a TLS router will terminate + the TLS connection. + type: boolean + secretName: + description: SecretName is the name of the referenced Kubernetes + Secret to specify the certificate details. + type: string + store: + description: |- + Store defines the reference to the TLSStore, that will be used to store certificates. + Please note that only `default` TLSStore can be used. + properties: + name: + description: Name defines the name of the referenced Traefik + resource. + type: string + namespace: + description: Namespace defines the namespace of the referenced + Traefik resource. + type: string + required: + - name + type: object + type: object + required: + - routes + type: object + required: + - metadata + - spec + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: ingressrouteudps.traefik.io +spec: + group: traefik.io + names: + kind: IngressRouteUDP + listKind: IngressRouteUDPList + plural: ingressrouteudps + singular: ingressrouteudp + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: IngressRouteUDP is a CRD implementation of a Traefik UDP Router. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: IngressRouteUDPSpec defines the desired state of a IngressRouteUDP. + properties: + entryPoints: + description: |- + EntryPoints defines the list of entry point names to bind to. + Entry points have to be configured in the static configuration. + More info: https://doc.traefik.io/traefik/v3.3/routing/entrypoints/ + Default: all. + items: + type: string + type: array + routes: + description: Routes defines the list of routes. + items: + description: RouteUDP holds the UDP route configuration. + properties: + services: + description: Services defines the list of UDP services. + items: + description: ServiceUDP defines an upstream UDP service to + proxy traffic to. + properties: + name: + description: Name defines the name of the referenced Kubernetes + Service. + type: string + namespace: + description: Namespace defines the namespace of the referenced + Kubernetes Service. + type: string + nativeLB: + description: |- + NativeLB controls, when creating the load-balancer, + whether the LB's children are directly the pods IPs or if the only child is the Kubernetes Service clusterIP. + The Kubernetes Service itself does load-balance to the pods. + By default, NativeLB is false. + type: boolean + nodePortLB: + description: |- + NodePortLB controls, when creating the load-balancer, + whether the LB's children are directly the nodes internal IPs using the nodePort when the service type is NodePort. + It allows services to be reachable when Traefik runs externally from the Kubernetes cluster but within the same network of the nodes. + By default, NodePortLB is false. + type: boolean + port: + anyOf: + - type: integer + - type: string + description: |- + Port defines the port of a Kubernetes Service. + This can be a reference to a named port. + x-kubernetes-int-or-string: true + weight: + description: Weight defines the weight used when balancing + requests between multiple Kubernetes Service. + type: integer + required: + - name + - port + type: object + type: array + type: object + type: array + required: + - routes + type: object + required: + - metadata + - spec + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: middlewares.traefik.io +spec: + group: traefik.io + names: + kind: Middleware + listKind: MiddlewareList + plural: middlewares + singular: middleware + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + Middleware is the CRD implementation of a Traefik Middleware. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/overview/ + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: MiddlewareSpec defines the desired state of a Middleware. + properties: + addPrefix: + description: |- + AddPrefix holds the add prefix middleware configuration. + This middleware updates the path of a request before forwarding it. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/addprefix/ + properties: + prefix: + description: |- + Prefix is the string to add before the current path in the requested URL. + It should include a leading slash (/). + type: string + type: object + basicAuth: + description: |- + BasicAuth holds the basic auth middleware configuration. + This middleware restricts access to your services to known users. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/basicauth/ + properties: + headerField: + description: |- + HeaderField defines a header field to store the authenticated user. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/basicauth/#headerfield + type: string + realm: + description: |- + Realm allows the protected resources on a server to be partitioned into a set of protection spaces, each with its own authentication scheme. + Default: traefik. + type: string + removeHeader: + description: |- + RemoveHeader sets the removeHeader option to true to remove the authorization header before forwarding the request to your service. + Default: false. + type: boolean + secret: + description: Secret is the name of the referenced Kubernetes Secret + containing user credentials. + type: string + type: object + buffering: + description: |- + Buffering holds the buffering middleware configuration. + This middleware retries or limits the size of requests that can be forwarded to backends. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/buffering/#maxrequestbodybytes + properties: + maxRequestBodyBytes: + description: |- + MaxRequestBodyBytes defines the maximum allowed body size for the request (in bytes). + If the request exceeds the allowed size, it is not forwarded to the service, and the client gets a 413 (Request Entity Too Large) response. + Default: 0 (no maximum). + format: int64 + type: integer + maxResponseBodyBytes: + description: |- + MaxResponseBodyBytes defines the maximum allowed response size from the service (in bytes). + If the response exceeds the allowed size, it is not forwarded to the client. The client gets a 500 (Internal Server Error) response instead. + Default: 0 (no maximum). + format: int64 + type: integer + memRequestBodyBytes: + description: |- + MemRequestBodyBytes defines the threshold (in bytes) from which the request will be buffered on disk instead of in memory. + Default: 1048576 (1Mi). + format: int64 + type: integer + memResponseBodyBytes: + description: |- + MemResponseBodyBytes defines the threshold (in bytes) from which the response will be buffered on disk instead of in memory. + Default: 1048576 (1Mi). + format: int64 + type: integer + retryExpression: + description: |- + RetryExpression defines the retry conditions. + It is a logical combination of functions with operators AND (&&) and OR (||). + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/buffering/#retryexpression + type: string + type: object + chain: + description: |- + Chain holds the configuration of the chain middleware. + This middleware enables to define reusable combinations of other pieces of middleware. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/chain/ + properties: + middlewares: + description: Middlewares is the list of MiddlewareRef which composes + the chain. + items: + description: MiddlewareRef is a reference to a Middleware resource. + properties: + name: + description: Name defines the name of the referenced Middleware + resource. + type: string + namespace: + description: Namespace defines the namespace of the referenced + Middleware resource. + type: string + required: + - name + type: object + type: array + type: object + circuitBreaker: + description: CircuitBreaker holds the circuit breaker configuration. + properties: + checkPeriod: + anyOf: + - type: integer + - type: string + description: CheckPeriod is the interval between successive checks + of the circuit breaker condition (when in standby state). + x-kubernetes-int-or-string: true + expression: + description: Expression is the condition that triggers the tripped + state. + type: string + fallbackDuration: + anyOf: + - type: integer + - type: string + description: FallbackDuration is the duration for which the circuit + breaker will wait before trying to recover (from a tripped state). + x-kubernetes-int-or-string: true + recoveryDuration: + anyOf: + - type: integer + - type: string + description: RecoveryDuration is the duration for which the circuit + breaker will try to recover (as soon as it is in recovering + state). + x-kubernetes-int-or-string: true + responseCode: + description: ResponseCode is the status code that the circuit + breaker will return while it is in the open state. + type: integer + type: object + compress: + description: |- + Compress holds the compress middleware configuration. + This middleware compresses responses before sending them to the client, using gzip, brotli, or zstd compression. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/compress/ + properties: + defaultEncoding: + description: DefaultEncoding specifies the default encoding if + the `Accept-Encoding` header is not in the request or contains + a wildcard (`*`). + type: string + encodings: + description: Encodings defines the list of supported compression + algorithms. + items: + type: string + type: array + excludedContentTypes: + description: |- + ExcludedContentTypes defines the list of content types to compare the Content-Type header of the incoming requests and responses before compressing. + `application/grpc` is always excluded. + items: + type: string + type: array + includedContentTypes: + description: IncludedContentTypes defines the list of content + types to compare the Content-Type header of the responses before + compressing. + items: + type: string + type: array + minResponseBodyBytes: + description: |- + MinResponseBodyBytes defines the minimum amount of bytes a response body must have to be compressed. + Default: 1024. + type: integer + type: object + contentType: + description: |- + ContentType holds the content-type middleware configuration. + This middleware exists to enable the correct behavior until at least the default one can be changed in a future version. + properties: + autoDetect: + description: |- + AutoDetect specifies whether to let the `Content-Type` header, if it has not been set by the backend, + be automatically set to a value derived from the contents of the response. + Deprecated: AutoDetect option is deprecated, Content-Type middleware is only meant to be used to enable the content-type detection, please remove any usage of this option. + type: boolean + type: object + digestAuth: + description: |- + DigestAuth holds the digest auth middleware configuration. + This middleware restricts access to your services to known users. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/digestauth/ + properties: + headerField: + description: |- + HeaderField defines a header field to store the authenticated user. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/basicauth/#headerfield + type: string + realm: + description: |- + Realm allows the protected resources on a server to be partitioned into a set of protection spaces, each with its own authentication scheme. + Default: traefik. + type: string + removeHeader: + description: RemoveHeader defines whether to remove the authorization + header before forwarding the request to the backend. + type: boolean + secret: + description: Secret is the name of the referenced Kubernetes Secret + containing user credentials. + type: string + type: object + errors: + description: |- + ErrorPage holds the custom error middleware configuration. + This middleware returns a custom page in lieu of the default, according to configured ranges of HTTP Status codes. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/errorpages/ + properties: + query: + description: |- + Query defines the URL for the error page (hosted by service). + The {status} variable can be used in order to insert the status code in the URL. + type: string + service: + description: |- + Service defines the reference to a Kubernetes Service that will serve the error page. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/errorpages/#service + properties: + healthCheck: + description: Healthcheck defines health checks for ExternalName + services. + properties: + followRedirects: + description: |- + FollowRedirects defines whether redirects should be followed during the health check calls. + Default: true + type: boolean + headers: + additionalProperties: + type: string + description: Headers defines custom headers to be sent + to the health check endpoint. + type: object + hostname: + description: Hostname defines the value of hostname in + the Host header of the health check request. + type: string + interval: + anyOf: + - type: integer + - type: string + description: |- + Interval defines the frequency of the health check calls. + Default: 30s + x-kubernetes-int-or-string: true + method: + description: Method defines the healthcheck method. + type: string + mode: + description: |- + Mode defines the health check mode. + If defined to grpc, will use the gRPC health check protocol to probe the server. + Default: http + type: string + path: + description: Path defines the server URL path for the + health check endpoint. + type: string + port: + description: Port defines the server URL port for the + health check endpoint. + type: integer + scheme: + description: Scheme replaces the server URL scheme for + the health check endpoint. + type: string + status: + description: Status defines the expected HTTP status code + of the response to the health check request. + type: integer + timeout: + anyOf: + - type: integer + - type: string + description: |- + Timeout defines the maximum duration Traefik will wait for a health check request before considering the server unhealthy. + Default: 5s + x-kubernetes-int-or-string: true + type: object + kind: + description: Kind defines the kind of the Service. + enum: + - Service + - TraefikService + type: string + name: + description: |- + Name defines the name of the referenced Kubernetes Service or TraefikService. + The differentiation between the two is specified in the Kind field. + type: string + namespace: + description: Namespace defines the namespace of the referenced + Kubernetes Service or TraefikService. + type: string + nativeLB: + description: |- + NativeLB controls, when creating the load-balancer, + whether the LB's children are directly the pods IPs or if the only child is the Kubernetes Service clusterIP. + The Kubernetes Service itself does load-balance to the pods. + By default, NativeLB is false. + type: boolean + nodePortLB: + description: |- + NodePortLB controls, when creating the load-balancer, + whether the LB's children are directly the nodes internal IPs using the nodePort when the service type is NodePort. + It allows services to be reachable when Traefik runs externally from the Kubernetes cluster but within the same network of the nodes. + By default, NodePortLB is false. + type: boolean + passHostHeader: + description: |- + PassHostHeader defines whether the client Host header is forwarded to the upstream Kubernetes Service. + By default, passHostHeader is true. + type: boolean + port: + anyOf: + - type: integer + - type: string + description: |- + Port defines the port of a Kubernetes Service. + This can be a reference to a named port. + x-kubernetes-int-or-string: true + responseForwarding: + description: ResponseForwarding defines how Traefik forwards + the response from the upstream Kubernetes Service to the + client. + properties: + flushInterval: + description: |- + FlushInterval defines the interval, in milliseconds, in between flushes to the client while copying the response body. + A negative value means to flush immediately after each write to the client. + This configuration is ignored when ReverseProxy recognizes a response as a streaming response; + for such responses, writes are flushed to the client immediately. + Default: 100ms + type: string + type: object + scheme: + description: |- + Scheme defines the scheme to use for the request to the upstream Kubernetes Service. + It defaults to https when Kubernetes Service port is 443, http otherwise. + type: string + serversTransport: + description: |- + ServersTransport defines the name of ServersTransport resource to use. + It allows to configure the transport between Traefik and your servers. + Can only be used on a Kubernetes Service. + type: string + sticky: + description: |- + Sticky defines the sticky sessions configuration. + More info: https://doc.traefik.io/traefik/v3.3/routing/services/#sticky-sessions + properties: + cookie: + description: Cookie defines the sticky cookie configuration. + properties: + httpOnly: + description: HTTPOnly defines whether the cookie can + be accessed by client-side APIs, such as JavaScript. + type: boolean + maxAge: + description: |- + MaxAge defines the number of seconds until the cookie expires. + When set to a negative number, the cookie expires immediately. + When set to zero, the cookie never expires. + type: integer + name: + description: Name defines the Cookie name. + type: string + path: + description: |- + Path defines the path that must exist in the requested URL for the browser to send the Cookie header. + When not provided the cookie will be sent on every request to the domain. + More info: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie#pathpath-value + type: string + sameSite: + description: |- + SameSite defines the same site policy. + More info: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie/SameSite + type: string + secure: + description: Secure defines whether the cookie can + only be transmitted over an encrypted connection + (i.e. HTTPS). + type: boolean + type: object + type: object + strategy: + description: |- + Strategy defines the load balancing strategy between the servers. + RoundRobin is the only supported value at the moment. + type: string + weight: + description: |- + Weight defines the weight and should only be specified when Name references a TraefikService object + (and to be precise, one that embeds a Weighted Round Robin). + type: integer + required: + - name + type: object + status: + description: |- + Status defines which status or range of statuses should result in an error page. + It can be either a status code as a number (500), + as multiple comma-separated numbers (500,502), + as ranges by separating two codes with a dash (500-599), + or a combination of the two (404,418,500-599). + items: + type: string + type: array + type: object + forwardAuth: + description: |- + ForwardAuth holds the forward auth middleware configuration. + This middleware delegates the request authentication to a Service. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/forwardauth/ + properties: + addAuthCookiesToResponse: + description: AddAuthCookiesToResponse defines the list of cookies + to copy from the authentication server response to the response. + items: + type: string + type: array + address: + description: Address defines the authentication server address. + type: string + authRequestHeaders: + description: |- + AuthRequestHeaders defines the list of the headers to copy from the request to the authentication server. + If not set or empty then all request headers are passed. + items: + type: string + type: array + authResponseHeaders: + description: AuthResponseHeaders defines the list of headers to + copy from the authentication server response and set on forwarded + request, replacing any existing conflicting headers. + items: + type: string + type: array + authResponseHeadersRegex: + description: |- + AuthResponseHeadersRegex defines the regex to match headers to copy from the authentication server response and set on forwarded request, after stripping all headers that match the regex. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/forwardauth/#authresponseheadersregex + type: string + forwardBody: + description: ForwardBody defines whether to send the request body + to the authentication server. + type: boolean + headerField: + description: |- + HeaderField defines a header field to store the authenticated user. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/forwardauth/#headerfield + type: string + maxBodySize: + description: MaxBodySize defines the maximum body size in bytes + allowed to be forwarded to the authentication server. + format: int64 + type: integer + preserveLocationHeader: + description: PreserveLocationHeader defines whether to forward + the Location header to the client as is or prefix it with the + domain name of the authentication server. + type: boolean + tls: + description: TLS defines the configuration used to secure the + connection to the authentication server. + properties: + caOptional: + description: 'Deprecated: TLS client authentication is a server + side option (see https://github.com/golang/go/blob/740a490f71d026bb7d2d13cb8fa2d6d6e0572b70/src/crypto/tls/common.go#L634).' + type: boolean + caSecret: + description: |- + CASecret is the name of the referenced Kubernetes Secret containing the CA to validate the server certificate. + The CA certificate is extracted from key `tls.ca` or `ca.crt`. + type: string + certSecret: + description: |- + CertSecret is the name of the referenced Kubernetes Secret containing the client certificate. + The client certificate is extracted from the keys `tls.crt` and `tls.key`. + type: string + insecureSkipVerify: + description: InsecureSkipVerify defines whether the server + certificates should be validated. + type: boolean + type: object + trustForwardHeader: + description: 'TrustForwardHeader defines whether to trust (ie: + forward) all X-Forwarded-* headers.' + type: boolean + type: object + grpcWeb: + description: |- + GrpcWeb holds the gRPC web middleware configuration. + This middleware converts a gRPC web request to an HTTP/2 gRPC request. + properties: + allowOrigins: + description: |- + AllowOrigins is a list of allowable origins. + Can also be a wildcard origin "*". + items: + type: string + type: array + type: object + headers: + description: |- + Headers holds the headers middleware configuration. + This middleware manages the requests and responses headers. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/headers/#customrequestheaders + properties: + accessControlAllowCredentials: + description: AccessControlAllowCredentials defines whether the + request can include user credentials. + type: boolean + accessControlAllowHeaders: + description: AccessControlAllowHeaders defines the Access-Control-Request-Headers + values sent in preflight response. + items: + type: string + type: array + accessControlAllowMethods: + description: AccessControlAllowMethods defines the Access-Control-Request-Method + values sent in preflight response. + items: + type: string + type: array + accessControlAllowOriginList: + description: AccessControlAllowOriginList is a list of allowable + origins. Can also be a wildcard origin "*". + items: + type: string + type: array + accessControlAllowOriginListRegex: + description: AccessControlAllowOriginListRegex is a list of allowable + origins written following the Regular Expression syntax (https://golang.org/pkg/regexp/). + items: + type: string + type: array + accessControlExposeHeaders: + description: AccessControlExposeHeaders defines the Access-Control-Expose-Headers + values sent in preflight response. + items: + type: string + type: array + accessControlMaxAge: + description: AccessControlMaxAge defines the time that a preflight + request may be cached. + format: int64 + type: integer + addVaryHeader: + description: AddVaryHeader defines whether the Vary header is + automatically added/updated when the AccessControlAllowOriginList + is set. + type: boolean + allowedHosts: + description: AllowedHosts defines the fully qualified list of + allowed domain names. + items: + type: string + type: array + browserXssFilter: + description: BrowserXSSFilter defines whether to add the X-XSS-Protection + header with the value 1; mode=block. + type: boolean + contentSecurityPolicy: + description: ContentSecurityPolicy defines the Content-Security-Policy + header value. + type: string + contentSecurityPolicyReportOnly: + description: ContentSecurityPolicyReportOnly defines the Content-Security-Policy-Report-Only + header value. + type: string + contentTypeNosniff: + description: ContentTypeNosniff defines whether to add the X-Content-Type-Options + header with the nosniff value. + type: boolean + customBrowserXSSValue: + description: |- + CustomBrowserXSSValue defines the X-XSS-Protection header value. + This overrides the BrowserXssFilter option. + type: string + customFrameOptionsValue: + description: |- + CustomFrameOptionsValue defines the X-Frame-Options header value. + This overrides the FrameDeny option. + type: string + customRequestHeaders: + additionalProperties: + type: string + description: CustomRequestHeaders defines the header names and + values to apply to the request. + type: object + customResponseHeaders: + additionalProperties: + type: string + description: CustomResponseHeaders defines the header names and + values to apply to the response. + type: object + featurePolicy: + description: 'Deprecated: FeaturePolicy option is deprecated, + please use PermissionsPolicy instead.' + type: string + forceSTSHeader: + description: ForceSTSHeader defines whether to add the STS header + even when the connection is HTTP. + type: boolean + frameDeny: + description: FrameDeny defines whether to add the X-Frame-Options + header with the DENY value. + type: boolean + hostsProxyHeaders: + description: HostsProxyHeaders defines the header keys that may + hold a proxied hostname value for the request. + items: + type: string + type: array + isDevelopment: + description: |- + IsDevelopment defines whether to mitigate the unwanted effects of the AllowedHosts, SSL, and STS options when developing. + Usually testing takes place using HTTP, not HTTPS, and on localhost, not your production domain. + If you would like your development environment to mimic production with complete Host blocking, SSL redirects, + and STS headers, leave this as false. + type: boolean + permissionsPolicy: + description: |- + PermissionsPolicy defines the Permissions-Policy header value. + This allows sites to control browser features. + type: string + publicKey: + description: PublicKey is the public key that implements HPKP + to prevent MITM attacks with forged certificates. + type: string + referrerPolicy: + description: |- + ReferrerPolicy defines the Referrer-Policy header value. + This allows sites to control whether browsers forward the Referer header to other sites. + type: string + sslForceHost: + description: 'Deprecated: SSLForceHost option is deprecated, please + use RedirectRegex instead.' + type: boolean + sslHost: + description: 'Deprecated: SSLHost option is deprecated, please + use RedirectRegex instead.' + type: string + sslProxyHeaders: + additionalProperties: + type: string + description: |- + SSLProxyHeaders defines the header keys with associated values that would indicate a valid HTTPS request. + It can be useful when using other proxies (example: "X-Forwarded-Proto": "https"). + type: object + sslRedirect: + description: 'Deprecated: SSLRedirect option is deprecated, please + use EntryPoint redirection or RedirectScheme instead.' + type: boolean + sslTemporaryRedirect: + description: 'Deprecated: SSLTemporaryRedirect option is deprecated, + please use EntryPoint redirection or RedirectScheme instead.' + type: boolean + stsIncludeSubdomains: + description: STSIncludeSubdomains defines whether the includeSubDomains + directive is appended to the Strict-Transport-Security header. + type: boolean + stsPreload: + description: STSPreload defines whether the preload flag is appended + to the Strict-Transport-Security header. + type: boolean + stsSeconds: + description: |- + STSSeconds defines the max-age of the Strict-Transport-Security header. + If set to 0, the header is not set. + format: int64 + type: integer + type: object + inFlightReq: + description: |- + InFlightReq holds the in-flight request middleware configuration. + This middleware limits the number of requests being processed and served concurrently. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/inflightreq/ + properties: + amount: + description: |- + Amount defines the maximum amount of allowed simultaneous in-flight request. + The middleware responds with HTTP 429 Too Many Requests if there are already amount requests in progress (based on the same sourceCriterion strategy). + format: int64 + type: integer + sourceCriterion: + description: |- + SourceCriterion defines what criterion is used to group requests as originating from a common source. + If several strategies are defined at the same time, an error will be raised. + If none are set, the default is to use the requestHost. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/inflightreq/#sourcecriterion + properties: + ipStrategy: + description: |- + IPStrategy holds the IP strategy configuration used by Traefik to determine the client IP. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/ipallowlist/#ipstrategy + properties: + depth: + description: Depth tells Traefik to use the X-Forwarded-For + header and take the IP located at the depth position + (starting from the right). + type: integer + excludedIPs: + description: ExcludedIPs configures Traefik to scan the + X-Forwarded-For header and select the first IP not in + the list. + items: + type: string + type: array + ipv6Subnet: + description: IPv6Subnet configures Traefik to consider + all IPv6 addresses from the defined subnet as originating + from the same IP. Applies to RemoteAddrStrategy and + DepthStrategy. + type: integer + type: object + requestHeaderName: + description: RequestHeaderName defines the name of the header + used to group incoming requests. + type: string + requestHost: + description: RequestHost defines whether to consider the request + Host as the source. + type: boolean + type: object + type: object + ipAllowList: + description: |- + IPAllowList holds the IP allowlist middleware configuration. + This middleware limits allowed requests based on the client IP. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/ipallowlist/ + properties: + ipStrategy: + description: |- + IPStrategy holds the IP strategy configuration used by Traefik to determine the client IP. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/ipallowlist/#ipstrategy + properties: + depth: + description: Depth tells Traefik to use the X-Forwarded-For + header and take the IP located at the depth position (starting + from the right). + type: integer + excludedIPs: + description: ExcludedIPs configures Traefik to scan the X-Forwarded-For + header and select the first IP not in the list. + items: + type: string + type: array + ipv6Subnet: + description: IPv6Subnet configures Traefik to consider all + IPv6 addresses from the defined subnet as originating from + the same IP. Applies to RemoteAddrStrategy and DepthStrategy. + type: integer + type: object + rejectStatusCode: + description: |- + RejectStatusCode defines the HTTP status code used for refused requests. + If not set, the default is 403 (Forbidden). + type: integer + sourceRange: + description: SourceRange defines the set of allowed IPs (or ranges + of allowed IPs by using CIDR notation). + items: + type: string + type: array + type: object + ipWhiteList: + description: 'Deprecated: please use IPAllowList instead.' + properties: + ipStrategy: + description: |- + IPStrategy holds the IP strategy configuration used by Traefik to determine the client IP. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/ipallowlist/#ipstrategy + properties: + depth: + description: Depth tells Traefik to use the X-Forwarded-For + header and take the IP located at the depth position (starting + from the right). + type: integer + excludedIPs: + description: ExcludedIPs configures Traefik to scan the X-Forwarded-For + header and select the first IP not in the list. + items: + type: string + type: array + ipv6Subnet: + description: IPv6Subnet configures Traefik to consider all + IPv6 addresses from the defined subnet as originating from + the same IP. Applies to RemoteAddrStrategy and DepthStrategy. + type: integer + type: object + sourceRange: + description: SourceRange defines the set of allowed IPs (or ranges + of allowed IPs by using CIDR notation). Required. + items: + type: string + type: array + type: object + passTLSClientCert: + description: |- + PassTLSClientCert holds the pass TLS client cert middleware configuration. + This middleware adds the selected data from the passed client TLS certificate to a header. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/passtlsclientcert/ + properties: + info: + description: Info selects the specific client certificate details + you want to add to the X-Forwarded-Tls-Client-Cert-Info header. + properties: + issuer: + description: Issuer defines the client certificate issuer + details to add to the X-Forwarded-Tls-Client-Cert-Info header. + properties: + commonName: + description: CommonName defines whether to add the organizationalUnit + information into the issuer. + type: boolean + country: + description: Country defines whether to add the country + information into the issuer. + type: boolean + domainComponent: + description: DomainComponent defines whether to add the + domainComponent information into the issuer. + type: boolean + locality: + description: Locality defines whether to add the locality + information into the issuer. + type: boolean + organization: + description: Organization defines whether to add the organization + information into the issuer. + type: boolean + province: + description: Province defines whether to add the province + information into the issuer. + type: boolean + serialNumber: + description: SerialNumber defines whether to add the serialNumber + information into the issuer. + type: boolean + type: object + notAfter: + description: NotAfter defines whether to add the Not After + information from the Validity part. + type: boolean + notBefore: + description: NotBefore defines whether to add the Not Before + information from the Validity part. + type: boolean + sans: + description: Sans defines whether to add the Subject Alternative + Name information from the Subject Alternative Name part. + type: boolean + serialNumber: + description: SerialNumber defines whether to add the client + serialNumber information. + type: boolean + subject: + description: Subject defines the client certificate subject + details to add to the X-Forwarded-Tls-Client-Cert-Info header. + properties: + commonName: + description: CommonName defines whether to add the organizationalUnit + information into the subject. + type: boolean + country: + description: Country defines whether to add the country + information into the subject. + type: boolean + domainComponent: + description: DomainComponent defines whether to add the + domainComponent information into the subject. + type: boolean + locality: + description: Locality defines whether to add the locality + information into the subject. + type: boolean + organization: + description: Organization defines whether to add the organization + information into the subject. + type: boolean + organizationalUnit: + description: OrganizationalUnit defines whether to add + the organizationalUnit information into the subject. + type: boolean + province: + description: Province defines whether to add the province + information into the subject. + type: boolean + serialNumber: + description: SerialNumber defines whether to add the serialNumber + information into the subject. + type: boolean + type: object + type: object + pem: + description: PEM sets the X-Forwarded-Tls-Client-Cert header with + the certificate. + type: boolean + type: object + plugin: + additionalProperties: + x-kubernetes-preserve-unknown-fields: true + description: |- + Plugin defines the middleware plugin configuration. + More info: https://doc.traefik.io/traefik/plugins/ + type: object + rateLimit: + description: |- + RateLimit holds the rate limit configuration. + This middleware ensures that services will receive a fair amount of requests, and allows one to define what fair is. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/ratelimit/ + properties: + average: + description: |- + Average is the maximum rate, by default in requests/s, allowed for the given source. + It defaults to 0, which means no rate limiting. + The rate is actually defined by dividing Average by Period. So for a rate below 1req/s, + one needs to define a Period larger than a second. + format: int64 + type: integer + burst: + description: |- + Burst is the maximum number of requests allowed to arrive in the same arbitrarily small period of time. + It defaults to 1. + format: int64 + type: integer + period: + anyOf: + - type: integer + - type: string + description: |- + Period, in combination with Average, defines the actual maximum rate, such as: + r = Average / Period. It defaults to a second. + x-kubernetes-int-or-string: true + sourceCriterion: + description: |- + SourceCriterion defines what criterion is used to group requests as originating from a common source. + If several strategies are defined at the same time, an error will be raised. + If none are set, the default is to use the request's remote address field (as an ipStrategy). + properties: + ipStrategy: + description: |- + IPStrategy holds the IP strategy configuration used by Traefik to determine the client IP. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/ipallowlist/#ipstrategy + properties: + depth: + description: Depth tells Traefik to use the X-Forwarded-For + header and take the IP located at the depth position + (starting from the right). + type: integer + excludedIPs: + description: ExcludedIPs configures Traefik to scan the + X-Forwarded-For header and select the first IP not in + the list. + items: + type: string + type: array + ipv6Subnet: + description: IPv6Subnet configures Traefik to consider + all IPv6 addresses from the defined subnet as originating + from the same IP. Applies to RemoteAddrStrategy and + DepthStrategy. + type: integer + type: object + requestHeaderName: + description: RequestHeaderName defines the name of the header + used to group incoming requests. + type: string + requestHost: + description: RequestHost defines whether to consider the request + Host as the source. + type: boolean + type: object + type: object + redirectRegex: + description: |- + RedirectRegex holds the redirect regex middleware configuration. + This middleware redirects a request using regex matching and replacement. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/redirectregex/#regex + properties: + permanent: + description: Permanent defines whether the redirection is permanent + (301). + type: boolean + regex: + description: Regex defines the regex used to match and capture + elements from the request URL. + type: string + replacement: + description: Replacement defines how to modify the URL to have + the new target URL. + type: string + type: object + redirectScheme: + description: |- + RedirectScheme holds the redirect scheme middleware configuration. + This middleware redirects requests from a scheme/port to another. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/redirectscheme/ + properties: + permanent: + description: Permanent defines whether the redirection is permanent + (301). + type: boolean + port: + description: Port defines the port of the new URL. + type: string + scheme: + description: Scheme defines the scheme of the new URL. + type: string + type: object + replacePath: + description: |- + ReplacePath holds the replace path middleware configuration. + This middleware replaces the path of the request URL and store the original path in an X-Replaced-Path header. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/replacepath/ + properties: + path: + description: Path defines the path to use as replacement in the + request URL. + type: string + type: object + replacePathRegex: + description: |- + ReplacePathRegex holds the replace path regex middleware configuration. + This middleware replaces the path of a URL using regex matching and replacement. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/replacepathregex/ + properties: + regex: + description: Regex defines the regular expression used to match + and capture the path from the request URL. + type: string + replacement: + description: Replacement defines the replacement path format, + which can include captured variables. + type: string + type: object + retry: + description: |- + Retry holds the retry middleware configuration. + This middleware reissues requests a given number of times to a backend server if that server does not reply. + As soon as the server answers, the middleware stops retrying, regardless of the response status. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/retry/ + properties: + attempts: + description: Attempts defines how many times the request should + be retried. + type: integer + initialInterval: + anyOf: + - type: integer + - type: string + description: |- + InitialInterval defines the first wait time in the exponential backoff series. + The maximum interval is calculated as twice the initialInterval. + If unspecified, requests will be retried immediately. + The value of initialInterval should be provided in seconds or as a valid duration format, + see https://pkg.go.dev/time#ParseDuration. + x-kubernetes-int-or-string: true + type: object + stripPrefix: + description: |- + StripPrefix holds the strip prefix middleware configuration. + This middleware removes the specified prefixes from the URL path. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/stripprefix/ + properties: + forceSlash: + description: |- + Deprecated: ForceSlash option is deprecated, please remove any usage of this option. + ForceSlash ensures that the resulting stripped path is not the empty string, by replacing it with / when necessary. + Default: true. + type: boolean + prefixes: + description: Prefixes defines the prefixes to strip from the request + URL. + items: + type: string + type: array + type: object + stripPrefixRegex: + description: |- + StripPrefixRegex holds the strip prefix regex middleware configuration. + This middleware removes the matching prefixes from the URL path. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/http/stripprefixregex/ + properties: + regex: + description: Regex defines the regular expression to match the + path prefix from the request URL. + items: + type: string + type: array + type: object + type: object + required: + - metadata + - spec + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: middlewaretcps.traefik.io +spec: + group: traefik.io + names: + kind: MiddlewareTCP + listKind: MiddlewareTCPList + plural: middlewaretcps + singular: middlewaretcp + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + MiddlewareTCP is the CRD implementation of a Traefik TCP middleware. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/overview/ + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: MiddlewareTCPSpec defines the desired state of a MiddlewareTCP. + properties: + inFlightConn: + description: InFlightConn defines the InFlightConn middleware configuration. + properties: + amount: + description: |- + Amount defines the maximum amount of allowed simultaneous connections. + The middleware closes the connection if there are already amount connections opened. + format: int64 + type: integer + type: object + ipAllowList: + description: |- + IPAllowList defines the IPAllowList middleware configuration. + This middleware accepts/refuses connections based on the client IP. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/tcp/ipallowlist/ + properties: + sourceRange: + description: SourceRange defines the allowed IPs (or ranges of + allowed IPs by using CIDR notation). + items: + type: string + type: array + type: object + ipWhiteList: + description: |- + IPWhiteList defines the IPWhiteList middleware configuration. + This middleware accepts/refuses connections based on the client IP. + Deprecated: please use IPAllowList instead. + More info: https://doc.traefik.io/traefik/v3.3/middlewares/tcp/ipwhitelist/ + properties: + sourceRange: + description: SourceRange defines the allowed IPs (or ranges of + allowed IPs by using CIDR notation). + items: + type: string + type: array + type: object + type: object + required: + - metadata + - spec + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: serverstransports.traefik.io +spec: + group: traefik.io + names: + kind: ServersTransport + listKind: ServersTransportList + plural: serverstransports + singular: serverstransport + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + ServersTransport is the CRD implementation of a ServersTransport. + If no serversTransport is specified, the default@internal will be used. + The default@internal serversTransport is created from the static configuration. + More info: https://doc.traefik.io/traefik/v3.3/routing/services/#serverstransport_1 + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ServersTransportSpec defines the desired state of a ServersTransport. + properties: + certificatesSecrets: + description: CertificatesSecrets defines a list of secret storing + client certificates for mTLS. + items: + type: string + type: array + disableHTTP2: + description: DisableHTTP2 disables HTTP/2 for connections with backend + servers. + type: boolean + forwardingTimeouts: + description: ForwardingTimeouts defines the timeouts for requests + forwarded to the backend servers. + properties: + dialTimeout: + anyOf: + - type: integer + - type: string + description: DialTimeout is the amount of time to wait until a + connection to a backend server can be established. + x-kubernetes-int-or-string: true + idleConnTimeout: + anyOf: + - type: integer + - type: string + description: IdleConnTimeout is the maximum period for which an + idle HTTP keep-alive connection will remain open before closing + itself. + x-kubernetes-int-or-string: true + pingTimeout: + anyOf: + - type: integer + - type: string + description: PingTimeout is the timeout after which the HTTP/2 + connection will be closed if a response to ping is not received. + x-kubernetes-int-or-string: true + readIdleTimeout: + anyOf: + - type: integer + - type: string + description: ReadIdleTimeout is the timeout after which a health + check using ping frame will be carried out if no frame is received + on the HTTP/2 connection. + x-kubernetes-int-or-string: true + responseHeaderTimeout: + anyOf: + - type: integer + - type: string + description: ResponseHeaderTimeout is the amount of time to wait + for a server's response headers after fully writing the request + (including its body, if any). + x-kubernetes-int-or-string: true + type: object + insecureSkipVerify: + description: InsecureSkipVerify disables SSL certificate verification. + type: boolean + maxIdleConnsPerHost: + description: MaxIdleConnsPerHost controls the maximum idle (keep-alive) + to keep per-host. + type: integer + peerCertURI: + description: PeerCertURI defines the peer cert URI used to match against + SAN URI during the peer certificate verification. + type: string + rootCAsSecrets: + description: RootCAsSecrets defines a list of CA secret used to validate + self-signed certificate. + items: + type: string + type: array + serverName: + description: ServerName defines the server name used to contact the + server. + type: string + spiffe: + description: Spiffe defines the SPIFFE configuration. + properties: + ids: + description: IDs defines the allowed SPIFFE IDs (takes precedence + over the SPIFFE TrustDomain). + items: + type: string + type: array + trustDomain: + description: TrustDomain defines the allowed SPIFFE trust domain. + type: string + type: object + type: object + required: + - metadata + - spec + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: serverstransporttcps.traefik.io +spec: + group: traefik.io + names: + kind: ServersTransportTCP + listKind: ServersTransportTCPList + plural: serverstransporttcps + singular: serverstransporttcp + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + ServersTransportTCP is the CRD implementation of a TCPServersTransport. + If no tcpServersTransport is specified, a default one named default@internal will be used. + The default@internal tcpServersTransport can be configured in the static configuration. + More info: https://doc.traefik.io/traefik/v3.3/routing/services/#serverstransport_3 + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ServersTransportTCPSpec defines the desired state of a ServersTransportTCP. + properties: + dialKeepAlive: + anyOf: + - type: integer + - type: string + description: DialKeepAlive is the interval between keep-alive probes + for an active network connection. If zero, keep-alive probes are + sent with a default value (currently 15 seconds), if supported by + the protocol and operating system. Network protocols or operating + systems that do not support keep-alives ignore this field. If negative, + keep-alive probes are disabled. + x-kubernetes-int-or-string: true + dialTimeout: + anyOf: + - type: integer + - type: string + description: DialTimeout is the amount of time to wait until a connection + to a backend server can be established. + x-kubernetes-int-or-string: true + terminationDelay: + anyOf: + - type: integer + - type: string + description: TerminationDelay defines the delay to wait before fully + terminating the connection, after one connected peer has closed + its writing capability. + x-kubernetes-int-or-string: true + tls: + description: TLS defines the TLS configuration + properties: + certificatesSecrets: + description: CertificatesSecrets defines a list of secret storing + client certificates for mTLS. + items: + type: string + type: array + insecureSkipVerify: + description: InsecureSkipVerify disables TLS certificate verification. + type: boolean + peerCertURI: + description: |- + MaxIdleConnsPerHost controls the maximum idle (keep-alive) to keep per-host. + PeerCertURI defines the peer cert URI used to match against SAN URI during the peer certificate verification. + type: string + rootCAsSecrets: + description: RootCAsSecrets defines a list of CA secret used to + validate self-signed certificates. + items: + type: string + type: array + serverName: + description: ServerName defines the server name used to contact + the server. + type: string + spiffe: + description: Spiffe defines the SPIFFE configuration. + properties: + ids: + description: IDs defines the allowed SPIFFE IDs (takes precedence + over the SPIFFE TrustDomain). + items: + type: string + type: array + trustDomain: + description: TrustDomain defines the allowed SPIFFE trust + domain. + type: string + type: object + type: object + type: object + required: + - metadata + - spec + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: tlsoptions.traefik.io +spec: + group: traefik.io + names: + kind: TLSOption + listKind: TLSOptionList + plural: tlsoptions + singular: tlsoption + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + TLSOption is the CRD implementation of a Traefik TLS Option, allowing to configure some parameters of the TLS connection. + More info: https://doc.traefik.io/traefik/v3.3/https/tls/#tls-options + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: TLSOptionSpec defines the desired state of a TLSOption. + properties: + alpnProtocols: + description: |- + ALPNProtocols defines the list of supported application level protocols for the TLS handshake, in order of preference. + More info: https://doc.traefik.io/traefik/v3.3/https/tls/#alpn-protocols + items: + type: string + type: array + cipherSuites: + description: |- + CipherSuites defines the list of supported cipher suites for TLS versions up to TLS 1.2. + More info: https://doc.traefik.io/traefik/v3.3/https/tls/#cipher-suites + items: + type: string + type: array + clientAuth: + description: ClientAuth defines the server's policy for TLS Client + Authentication. + properties: + clientAuthType: + description: ClientAuthType defines the client authentication + type to apply. + enum: + - NoClientCert + - RequestClientCert + - RequireAnyClientCert + - VerifyClientCertIfGiven + - RequireAndVerifyClientCert + type: string + secretNames: + description: SecretNames defines the names of the referenced Kubernetes + Secret storing certificate details. + items: + type: string + type: array + type: object + curvePreferences: + description: |- + CurvePreferences defines the preferred elliptic curves in a specific order. + More info: https://doc.traefik.io/traefik/v3.3/https/tls/#curve-preferences + items: + type: string + type: array + maxVersion: + description: |- + MaxVersion defines the maximum TLS version that Traefik will accept. + Possible values: VersionTLS10, VersionTLS11, VersionTLS12, VersionTLS13. + Default: None. + type: string + minVersion: + description: |- + MinVersion defines the minimum TLS version that Traefik will accept. + Possible values: VersionTLS10, VersionTLS11, VersionTLS12, VersionTLS13. + Default: VersionTLS10. + type: string + preferServerCipherSuites: + description: |- + PreferServerCipherSuites defines whether the server chooses a cipher suite among his own instead of among the client's. + It is enabled automatically when minVersion or maxVersion is set. + Deprecated: https://github.com/golang/go/issues/45430 + type: boolean + sniStrict: + description: SniStrict defines whether Traefik allows connections + from clients connections that do not specify a server_name extension. + type: boolean + type: object + required: + - metadata + - spec + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: tlsstores.traefik.io +spec: + group: traefik.io + names: + kind: TLSStore + listKind: TLSStoreList + plural: tlsstores + singular: tlsstore + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + TLSStore is the CRD implementation of a Traefik TLS Store. + For the time being, only the TLSStore named default is supported. + This means that you cannot have two stores that are named default in different Kubernetes namespaces. + More info: https://doc.traefik.io/traefik/v3.3/https/tls/#certificates-stores + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: TLSStoreSpec defines the desired state of a TLSStore. + properties: + certificates: + description: Certificates is a list of secret names, each secret holding + a key/certificate pair to add to the store. + items: + description: Certificate holds a secret name for the TLSStore resource. + properties: + secretName: + description: SecretName is the name of the referenced Kubernetes + Secret to specify the certificate details. + type: string + required: + - secretName + type: object + type: array + defaultCertificate: + description: DefaultCertificate defines the default certificate configuration. + properties: + secretName: + description: SecretName is the name of the referenced Kubernetes + Secret to specify the certificate details. + type: string + required: + - secretName + type: object + defaultGeneratedCert: + description: DefaultGeneratedCert defines the default generated certificate + configuration. + properties: + domain: + description: Domain is the domain definition for the DefaultCertificate. + properties: + main: + description: Main defines the main domain name. + type: string + sans: + description: SANs defines the subject alternative domain names. + items: + type: string + type: array + type: object + resolver: + description: Resolver is the name of the resolver that will be + used to issue the DefaultCertificate. + type: string + type: object + type: object + required: + - metadata + - spec + type: object + served: true + storage: true +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.16.1 + name: traefikservices.traefik.io +spec: + group: traefik.io + names: + kind: TraefikService + listKind: TraefikServiceList + plural: traefikservices + singular: traefikservice + scope: Namespaced + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + TraefikService is the CRD implementation of a Traefik Service. + TraefikService object allows to: + - Apply weight to Services on load-balancing + - Mirror traffic on services + More info: https://doc.traefik.io/traefik/v3.3/routing/providers/kubernetes-crd/#kind-traefikservice + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: TraefikServiceSpec defines the desired state of a TraefikService. + properties: + mirroring: + description: Mirroring defines the Mirroring service configuration. + properties: + healthCheck: + description: Healthcheck defines health checks for ExternalName + services. + properties: + followRedirects: + description: |- + FollowRedirects defines whether redirects should be followed during the health check calls. + Default: true + type: boolean + headers: + additionalProperties: + type: string + description: Headers defines custom headers to be sent to + the health check endpoint. + type: object + hostname: + description: Hostname defines the value of hostname in the + Host header of the health check request. + type: string + interval: + anyOf: + - type: integer + - type: string + description: |- + Interval defines the frequency of the health check calls. + Default: 30s + x-kubernetes-int-or-string: true + method: + description: Method defines the healthcheck method. + type: string + mode: + description: |- + Mode defines the health check mode. + If defined to grpc, will use the gRPC health check protocol to probe the server. + Default: http + type: string + path: + description: Path defines the server URL path for the health + check endpoint. + type: string + port: + description: Port defines the server URL port for the health + check endpoint. + type: integer + scheme: + description: Scheme replaces the server URL scheme for the + health check endpoint. + type: string + status: + description: Status defines the expected HTTP status code + of the response to the health check request. + type: integer + timeout: + anyOf: + - type: integer + - type: string + description: |- + Timeout defines the maximum duration Traefik will wait for a health check request before considering the server unhealthy. + Default: 5s + x-kubernetes-int-or-string: true + type: object + kind: + description: Kind defines the kind of the Service. + enum: + - Service + - TraefikService + type: string + maxBodySize: + description: |- + MaxBodySize defines the maximum size allowed for the body of the request. + If the body is larger, the request is not mirrored. + Default value is -1, which means unlimited size. + format: int64 + type: integer + mirrorBody: + description: |- + MirrorBody defines whether the body of the request should be mirrored. + Default value is true. + type: boolean + mirrors: + description: Mirrors defines the list of mirrors where Traefik + will duplicate the traffic. + items: + description: MirrorService holds the mirror configuration. + properties: + healthCheck: + description: Healthcheck defines health checks for ExternalName + services. + properties: + followRedirects: + description: |- + FollowRedirects defines whether redirects should be followed during the health check calls. + Default: true + type: boolean + headers: + additionalProperties: + type: string + description: Headers defines custom headers to be sent + to the health check endpoint. + type: object + hostname: + description: Hostname defines the value of hostname + in the Host header of the health check request. + type: string + interval: + anyOf: + - type: integer + - type: string + description: |- + Interval defines the frequency of the health check calls. + Default: 30s + x-kubernetes-int-or-string: true + method: + description: Method defines the healthcheck method. + type: string + mode: + description: |- + Mode defines the health check mode. + If defined to grpc, will use the gRPC health check protocol to probe the server. + Default: http + type: string + path: + description: Path defines the server URL path for the + health check endpoint. + type: string + port: + description: Port defines the server URL port for the + health check endpoint. + type: integer + scheme: + description: Scheme replaces the server URL scheme for + the health check endpoint. + type: string + status: + description: Status defines the expected HTTP status + code of the response to the health check request. + type: integer + timeout: + anyOf: + - type: integer + - type: string + description: |- + Timeout defines the maximum duration Traefik will wait for a health check request before considering the server unhealthy. + Default: 5s + x-kubernetes-int-or-string: true + type: object + kind: + description: Kind defines the kind of the Service. + enum: + - Service + - TraefikService + type: string + name: + description: |- + Name defines the name of the referenced Kubernetes Service or TraefikService. + The differentiation between the two is specified in the Kind field. + type: string + namespace: + description: Namespace defines the namespace of the referenced + Kubernetes Service or TraefikService. + type: string + nativeLB: + description: |- + NativeLB controls, when creating the load-balancer, + whether the LB's children are directly the pods IPs or if the only child is the Kubernetes Service clusterIP. + The Kubernetes Service itself does load-balance to the pods. + By default, NativeLB is false. + type: boolean + nodePortLB: + description: |- + NodePortLB controls, when creating the load-balancer, + whether the LB's children are directly the nodes internal IPs using the nodePort when the service type is NodePort. + It allows services to be reachable when Traefik runs externally from the Kubernetes cluster but within the same network of the nodes. + By default, NodePortLB is false. + type: boolean + passHostHeader: + description: |- + PassHostHeader defines whether the client Host header is forwarded to the upstream Kubernetes Service. + By default, passHostHeader is true. + type: boolean + percent: + description: |- + Percent defines the part of the traffic to mirror. + Supported values: 0 to 100. + type: integer + port: + anyOf: + - type: integer + - type: string + description: |- + Port defines the port of a Kubernetes Service. + This can be a reference to a named port. + x-kubernetes-int-or-string: true + responseForwarding: + description: ResponseForwarding defines how Traefik forwards + the response from the upstream Kubernetes Service to the + client. + properties: + flushInterval: + description: |- + FlushInterval defines the interval, in milliseconds, in between flushes to the client while copying the response body. + A negative value means to flush immediately after each write to the client. + This configuration is ignored when ReverseProxy recognizes a response as a streaming response; + for such responses, writes are flushed to the client immediately. + Default: 100ms + type: string + type: object + scheme: + description: |- + Scheme defines the scheme to use for the request to the upstream Kubernetes Service. + It defaults to https when Kubernetes Service port is 443, http otherwise. + type: string + serversTransport: + description: |- + ServersTransport defines the name of ServersTransport resource to use. + It allows to configure the transport between Traefik and your servers. + Can only be used on a Kubernetes Service. + type: string + sticky: + description: |- + Sticky defines the sticky sessions configuration. + More info: https://doc.traefik.io/traefik/v3.3/routing/services/#sticky-sessions + properties: + cookie: + description: Cookie defines the sticky cookie configuration. + properties: + httpOnly: + description: HTTPOnly defines whether the cookie + can be accessed by client-side APIs, such as JavaScript. + type: boolean + maxAge: + description: |- + MaxAge defines the number of seconds until the cookie expires. + When set to a negative number, the cookie expires immediately. + When set to zero, the cookie never expires. + type: integer + name: + description: Name defines the Cookie name. + type: string + path: + description: |- + Path defines the path that must exist in the requested URL for the browser to send the Cookie header. + When not provided the cookie will be sent on every request to the domain. + More info: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie#pathpath-value + type: string + sameSite: + description: |- + SameSite defines the same site policy. + More info: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie/SameSite + type: string + secure: + description: Secure defines whether the cookie can + only be transmitted over an encrypted connection + (i.e. HTTPS). + type: boolean + type: object + type: object + strategy: + description: |- + Strategy defines the load balancing strategy between the servers. + RoundRobin is the only supported value at the moment. + type: string + weight: + description: |- + Weight defines the weight and should only be specified when Name references a TraefikService object + (and to be precise, one that embeds a Weighted Round Robin). + type: integer + required: + - name + type: object + type: array + name: + description: |- + Name defines the name of the referenced Kubernetes Service or TraefikService. + The differentiation between the two is specified in the Kind field. + type: string + namespace: + description: Namespace defines the namespace of the referenced + Kubernetes Service or TraefikService. + type: string + nativeLB: + description: |- + NativeLB controls, when creating the load-balancer, + whether the LB's children are directly the pods IPs or if the only child is the Kubernetes Service clusterIP. + The Kubernetes Service itself does load-balance to the pods. + By default, NativeLB is false. + type: boolean + nodePortLB: + description: |- + NodePortLB controls, when creating the load-balancer, + whether the LB's children are directly the nodes internal IPs using the nodePort when the service type is NodePort. + It allows services to be reachable when Traefik runs externally from the Kubernetes cluster but within the same network of the nodes. + By default, NodePortLB is false. + type: boolean + passHostHeader: + description: |- + PassHostHeader defines whether the client Host header is forwarded to the upstream Kubernetes Service. + By default, passHostHeader is true. + type: boolean + port: + anyOf: + - type: integer + - type: string + description: |- + Port defines the port of a Kubernetes Service. + This can be a reference to a named port. + x-kubernetes-int-or-string: true + responseForwarding: + description: ResponseForwarding defines how Traefik forwards the + response from the upstream Kubernetes Service to the client. + properties: + flushInterval: + description: |- + FlushInterval defines the interval, in milliseconds, in between flushes to the client while copying the response body. + A negative value means to flush immediately after each write to the client. + This configuration is ignored when ReverseProxy recognizes a response as a streaming response; + for such responses, writes are flushed to the client immediately. + Default: 100ms + type: string + type: object + scheme: + description: |- + Scheme defines the scheme to use for the request to the upstream Kubernetes Service. + It defaults to https when Kubernetes Service port is 443, http otherwise. + type: string + serversTransport: + description: |- + ServersTransport defines the name of ServersTransport resource to use. + It allows to configure the transport between Traefik and your servers. + Can only be used on a Kubernetes Service. + type: string + sticky: + description: |- + Sticky defines the sticky sessions configuration. + More info: https://doc.traefik.io/traefik/v3.3/routing/services/#sticky-sessions + properties: + cookie: + description: Cookie defines the sticky cookie configuration. + properties: + httpOnly: + description: HTTPOnly defines whether the cookie can be + accessed by client-side APIs, such as JavaScript. + type: boolean + maxAge: + description: |- + MaxAge defines the number of seconds until the cookie expires. + When set to a negative number, the cookie expires immediately. + When set to zero, the cookie never expires. + type: integer + name: + description: Name defines the Cookie name. + type: string + path: + description: |- + Path defines the path that must exist in the requested URL for the browser to send the Cookie header. + When not provided the cookie will be sent on every request to the domain. + More info: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie#pathpath-value + type: string + sameSite: + description: |- + SameSite defines the same site policy. + More info: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie/SameSite + type: string + secure: + description: Secure defines whether the cookie can only + be transmitted over an encrypted connection (i.e. HTTPS). + type: boolean + type: object + type: object + strategy: + description: |- + Strategy defines the load balancing strategy between the servers. + RoundRobin is the only supported value at the moment. + type: string + weight: + description: |- + Weight defines the weight and should only be specified when Name references a TraefikService object + (and to be precise, one that embeds a Weighted Round Robin). + type: integer + required: + - name + type: object + weighted: + description: Weighted defines the Weighted Round Robin configuration. + properties: + services: + description: Services defines the list of Kubernetes Service and/or + TraefikService to load-balance, with weight. + items: + description: Service defines an upstream HTTP service to proxy + traffic to. + properties: + healthCheck: + description: Healthcheck defines health checks for ExternalName + services. + properties: + followRedirects: + description: |- + FollowRedirects defines whether redirects should be followed during the health check calls. + Default: true + type: boolean + headers: + additionalProperties: + type: string + description: Headers defines custom headers to be sent + to the health check endpoint. + type: object + hostname: + description: Hostname defines the value of hostname + in the Host header of the health check request. + type: string + interval: + anyOf: + - type: integer + - type: string + description: |- + Interval defines the frequency of the health check calls. + Default: 30s + x-kubernetes-int-or-string: true + method: + description: Method defines the healthcheck method. + type: string + mode: + description: |- + Mode defines the health check mode. + If defined to grpc, will use the gRPC health check protocol to probe the server. + Default: http + type: string + path: + description: Path defines the server URL path for the + health check endpoint. + type: string + port: + description: Port defines the server URL port for the + health check endpoint. + type: integer + scheme: + description: Scheme replaces the server URL scheme for + the health check endpoint. + type: string + status: + description: Status defines the expected HTTP status + code of the response to the health check request. + type: integer + timeout: + anyOf: + - type: integer + - type: string + description: |- + Timeout defines the maximum duration Traefik will wait for a health check request before considering the server unhealthy. + Default: 5s + x-kubernetes-int-or-string: true + type: object + kind: + description: Kind defines the kind of the Service. + enum: + - Service + - TraefikService + type: string + name: + description: |- + Name defines the name of the referenced Kubernetes Service or TraefikService. + The differentiation between the two is specified in the Kind field. + type: string + namespace: + description: Namespace defines the namespace of the referenced + Kubernetes Service or TraefikService. + type: string + nativeLB: + description: |- + NativeLB controls, when creating the load-balancer, + whether the LB's children are directly the pods IPs or if the only child is the Kubernetes Service clusterIP. + The Kubernetes Service itself does load-balance to the pods. + By default, NativeLB is false. + type: boolean + nodePortLB: + description: |- + NodePortLB controls, when creating the load-balancer, + whether the LB's children are directly the nodes internal IPs using the nodePort when the service type is NodePort. + It allows services to be reachable when Traefik runs externally from the Kubernetes cluster but within the same network of the nodes. + By default, NodePortLB is false. + type: boolean + passHostHeader: + description: |- + PassHostHeader defines whether the client Host header is forwarded to the upstream Kubernetes Service. + By default, passHostHeader is true. + type: boolean + port: + anyOf: + - type: integer + - type: string + description: |- + Port defines the port of a Kubernetes Service. + This can be a reference to a named port. + x-kubernetes-int-or-string: true + responseForwarding: + description: ResponseForwarding defines how Traefik forwards + the response from the upstream Kubernetes Service to the + client. + properties: + flushInterval: + description: |- + FlushInterval defines the interval, in milliseconds, in between flushes to the client while copying the response body. + A negative value means to flush immediately after each write to the client. + This configuration is ignored when ReverseProxy recognizes a response as a streaming response; + for such responses, writes are flushed to the client immediately. + Default: 100ms + type: string + type: object + scheme: + description: |- + Scheme defines the scheme to use for the request to the upstream Kubernetes Service. + It defaults to https when Kubernetes Service port is 443, http otherwise. + type: string + serversTransport: + description: |- + ServersTransport defines the name of ServersTransport resource to use. + It allows to configure the transport between Traefik and your servers. + Can only be used on a Kubernetes Service. + type: string + sticky: + description: |- + Sticky defines the sticky sessions configuration. + More info: https://doc.traefik.io/traefik/v3.3/routing/services/#sticky-sessions + properties: + cookie: + description: Cookie defines the sticky cookie configuration. + properties: + httpOnly: + description: HTTPOnly defines whether the cookie + can be accessed by client-side APIs, such as JavaScript. + type: boolean + maxAge: + description: |- + MaxAge defines the number of seconds until the cookie expires. + When set to a negative number, the cookie expires immediately. + When set to zero, the cookie never expires. + type: integer + name: + description: Name defines the Cookie name. + type: string + path: + description: |- + Path defines the path that must exist in the requested URL for the browser to send the Cookie header. + When not provided the cookie will be sent on every request to the domain. + More info: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie#pathpath-value + type: string + sameSite: + description: |- + SameSite defines the same site policy. + More info: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie/SameSite + type: string + secure: + description: Secure defines whether the cookie can + only be transmitted over an encrypted connection + (i.e. HTTPS). + type: boolean + type: object + type: object + strategy: + description: |- + Strategy defines the load balancing strategy between the servers. + RoundRobin is the only supported value at the moment. + type: string + weight: + description: |- + Weight defines the weight and should only be specified when Name references a TraefikService object + (and to be precise, one that embeds a Weighted Round Robin). + type: integer + required: + - name + type: object + type: array + sticky: + description: |- + Sticky defines whether sticky sessions are enabled. + More info: https://doc.traefik.io/traefik/v3.3/routing/providers/kubernetes-crd/#stickiness-and-load-balancing + properties: + cookie: + description: Cookie defines the sticky cookie configuration. + properties: + httpOnly: + description: HTTPOnly defines whether the cookie can be + accessed by client-side APIs, such as JavaScript. + type: boolean + maxAge: + description: |- + MaxAge defines the number of seconds until the cookie expires. + When set to a negative number, the cookie expires immediately. + When set to zero, the cookie never expires. + type: integer + name: + description: Name defines the Cookie name. + type: string + path: + description: |- + Path defines the path that must exist in the requested URL for the browser to send the Cookie header. + When not provided the cookie will be sent on every request to the domain. + More info: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie#pathpath-value + type: string + sameSite: + description: |- + SameSite defines the same site policy. + More info: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie/SameSite + type: string + secure: + description: Secure defines whether the cookie can only + be transmitted over an encrypted connection (i.e. HTTPS). + type: boolean + type: object + type: object + type: object + type: object + required: + - metadata + - spec + type: object + served: true + storage: true diff --git a/infrastructure/traefik/deployment.yaml b/infrastructure/traefik/deployment.yaml index a34307a..600a504 100644 --- a/infrastructure/traefik/deployment.yaml +++ b/infrastructure/traefik/deployment.yaml @@ -27,6 +27,8 @@ items: creationTimestamp: null labels: app: traefik + app.kubernetes.io/instance: traefik-kube-system + app.kubernetes.io/name: traefik spec: containers: - args: diff --git a/infrastructure/traefik/kustomization.yaml b/infrastructure/traefik/kustomization.yaml index 4e36574..6abf485 100644 --- a/infrastructure/traefik/kustomization.yaml +++ b/infrastructure/traefik/kustomization.yaml @@ -5,6 +5,7 @@ metadata: name: traefik namespace: flux-system resources: + - crds.yaml - deployment.yaml - serviceaccount.yaml - clusterrole.yaml diff --git a/infrastructure/traefik/traefik-service-lb.yaml b/infrastructure/traefik/traefik-service-lb.yaml index e4929f1..839a55e 100644 --- a/infrastructure/traefik/traefik-service-lb.yaml +++ b/infrastructure/traefik/traefik-service-lb.yaml @@ -3,9 +3,10 @@ apiVersion: v1 kind: Service metadata: name: traefik - namespace: kube-system + namespace: traefik annotations: metallb.universe.tf/address-pool: communication-pool + metallb.universe.tf/allow-shared-ip: traefik spec: type: LoadBalancer loadBalancerClass: metallb @@ -20,5 +21,4 @@ spec: targetPort: websecure protocol: TCP selector: - app.kubernetes.io/instance: traefik-kube-system - app.kubernetes.io/name: traefik + app: traefik diff --git a/infrastructure/vault-injector/helmrelease.yaml b/infrastructure/vault-injector/helmrelease.yaml new file mode 100644 index 0000000..6a9c0fd --- /dev/null +++ b/infrastructure/vault-injector/helmrelease.yaml @@ -0,0 +1,43 @@ +# infrastructure/vault-injector/helmrelease.yaml +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: vault-injector + namespace: vault +spec: + interval: 30m + chart: + spec: + chart: vault + version: 0.31.0 + sourceRef: + kind: HelmRepository + name: hashicorp + namespace: flux-system + install: + remediation: { retries: 3 } + timeout: 10m + upgrade: + remediation: + retries: 3 + remediateLastFailure: true + cleanupOnFail: true + timeout: 10m + values: + global: + externalVaultAddr: http://vault.vault.svc.cluster.local:8200 + tlsDisable: true + server: + enabled: false + csi: + enabled: false + injector: + enabled: true + replicas: 1 + agentImage: + repository: hashicorp/vault + tag: "1.17.6" + webhook: + failurePolicy: Ignore + nodeSelector: + node-role.kubernetes.io/worker: "true" diff --git a/infrastructure/vault-injector/kustomization.yaml b/infrastructure/vault-injector/kustomization.yaml new file mode 100644 index 0000000..b4db089 --- /dev/null +++ b/infrastructure/vault-injector/kustomization.yaml @@ -0,0 +1,5 @@ +# infrastructure/vault-injector/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - helmrelease.yaml diff --git a/knowledge/catalog/atlas-summary.json b/knowledge/catalog/atlas-summary.json index 2139e29..fa35051 100644 --- a/knowledge/catalog/atlas-summary.json +++ b/knowledge/catalog/atlas-summary.json @@ -1,8 +1,8 @@ { "counts": { - "helmrelease_host_hints": 7, - "http_endpoints": 35, - "services": 44, - "workloads": 49 + "helmrelease_host_hints": 17, + "http_endpoints": 37, + "services": 43, + "workloads": 54 } } diff --git a/knowledge/catalog/atlas.json b/knowledge/catalog/atlas.json index 92f08f4..0d97bcd 100644 --- a/knowledge/catalog/atlas.json +++ b/knowledge/catalog/atlas.json @@ -12,12 +12,7 @@ "targetNamespace": "bstein-dev-home" }, { - "name": "ci-demo", - "path": "services/ci-demo", - "targetNamespace": null - }, - { - "name": "communication", + "name": "comms", "path": "services/comms", "targetNamespace": "comms" }, @@ -71,6 +66,11 @@ "path": "services/keycloak", "targetNamespace": "sso" }, + { + "name": "logging", + "path": "services/logging", + "targetNamespace": null + }, { "name": "longhorn-ui", "path": "infrastructure/longhorn/ui-ingress", @@ -81,6 +81,11 @@ "path": "services/mailu", "targetNamespace": "mailu-mailserver" }, + { + "name": "maintenance", + "path": "services/maintenance", + "targetNamespace": null + }, { "name": "metallb", "path": "infrastructure/metallb", @@ -116,11 +121,26 @@ "path": "services/openldap", "targetNamespace": "sso" }, + { + "name": "outline", + "path": "services/outline", + "targetNamespace": "outline" + }, { "name": "pegasus", "path": "services/pegasus", "targetNamespace": "jellyfin" }, + { + "name": "planka", + "path": "services/planka", + "targetNamespace": "planka" + }, + { + "name": "postgres", + "path": "infrastructure/postgres", + "targetNamespace": "postgres" + }, { "name": "sui-metrics", "path": "services/sui-metrics/overlays/atlas", @@ -163,7 +183,7 @@ "serviceAccountName": null, "nodeSelector": {}, "images": [ - "ollama/ollama:latest" + "ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d" ] }, { @@ -179,7 +199,7 @@ "node-role.kubernetes.io/worker": "true" }, "images": [ - "registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-84" + "registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92" ] }, { @@ -195,7 +215,7 @@ "node-role.kubernetes.io/worker": "true" }, "images": [ - "registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-84" + "registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92" ] }, { @@ -214,21 +234,6 @@ "python:3.11-slim" ] }, - { - "kind": "Deployment", - "namespace": "ci-demo", - "name": "ci-demo", - "labels": { - "app.kubernetes.io/name": "ci-demo" - }, - "serviceAccountName": null, - "nodeSelector": { - "hardware": "rpi4" - }, - "images": [ - "registry.bstein.dev/infra/ci-demo:v0.0.0-3" - ] - }, { "kind": "Deployment", "namespace": "comms", @@ -271,7 +276,7 @@ "hardware": "rpi5" }, "images": [ - "ghcr.io/element-hq/element-call:latest" + "ghcr.io/element-hq/element-call@sha256:e6897c7818331714eae19d83ef8ea94a8b41115f0d8d3f62c2fed2d02c65c9bc" ] }, { @@ -345,56 +350,6 @@ "nginx:1.27-alpine" ] }, - { - "kind": "Deployment", - "namespace": "comms", - "name": "othrys-element-element-web", - "labels": { - "app.kubernetes.io/instance": "othrys-element", - "app.kubernetes.io/name": "element-web" - }, - "serviceAccountName": "othrys-element-element-web", - "nodeSelector": { - "hardware": "rpi5" - }, - "images": [ - "ghcr.io/element-hq/element-web:v1.12.6" - ] - }, - { - "kind": "Deployment", - "namespace": "comms", - "name": "othrys-synapse-matrix-synapse", - "labels": { - "app.kubernetes.io/component": "synapse", - "app.kubernetes.io/instance": "othrys-synapse", - "app.kubernetes.io/name": "matrix-synapse" - }, - "serviceAccountName": "default", - "nodeSelector": { - "hardware": "rpi5" - }, - "images": [ - "ghcr.io/element-hq/synapse:v1.144.0" - ] - }, - { - "kind": "Deployment", - "namespace": "comms", - "name": "othrys-synapse-redis-master", - "labels": { - "app.kubernetes.io/component": "master", - "app.kubernetes.io/instance": "othrys-synapse", - "app.kubernetes.io/managed-by": "Helm", - "app.kubernetes.io/name": "redis", - "helm.sh/chart": "redis-17.17.1" - }, - "serviceAccountName": "othrys-synapse-redis", - "nodeSelector": {}, - "images": [ - "docker.io/bitnamilegacy/redis:7.0.12-debian-11-r34" - ] - }, { "kind": "DaemonSet", "namespace": "crypto", @@ -407,7 +362,7 @@ "node-role.kubernetes.io/worker": "true" }, "images": [ - "ghcr.io/tari-project/xmrig:latest" + "ghcr.io/tari-project/xmrig@sha256:80defbfd0b640d604c91cb5101d3642db7928e1e68ee3c6b011289b3565a39d9" ] }, { @@ -681,6 +636,66 @@ "hashicorp/vault-csi-provider:1.7.0" ] }, + { + "kind": "DaemonSet", + "namespace": "logging", + "name": "node-image-gc-rpi4", + "labels": { + "app": "node-image-gc-rpi4" + }, + "serviceAccountName": "node-image-gc-rpi4", + "nodeSelector": { + "hardware": "rpi4" + }, + "images": [ + "bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131" + ] + }, + { + "kind": "DaemonSet", + "namespace": "logging", + "name": "node-image-prune-rpi5", + "labels": { + "app": "node-image-prune-rpi5" + }, + "serviceAccountName": "node-image-prune-rpi5", + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131" + ] + }, + { + "kind": "DaemonSet", + "namespace": "logging", + "name": "node-log-rotation", + "labels": { + "app": "node-log-rotation" + }, + "serviceAccountName": "node-log-rotation", + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131" + ] + }, + { + "kind": "Deployment", + "namespace": "logging", + "name": "oauth2-proxy-logs", + "labels": { + "app": "oauth2-proxy-logs" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "quay.io/oauth2-proxy/oauth2-proxy:v7.6.0" + ] + }, { "kind": "Deployment", "namespace": "longhorn-system", @@ -708,7 +723,7 @@ "mailu.bstein.dev/vip": "true" }, "images": [ - "lachlanevenson/k8s-kubectl:latest" + "registry.bstein.dev/bstein/kubectl:1.35.0" ] }, { @@ -726,37 +741,30 @@ }, { "kind": "DaemonSet", - "namespace": "metallb-system", - "name": "metallb-speaker", + "namespace": "maintenance", + "name": "node-image-sweeper", "labels": { - "app.kubernetes.io/component": "speaker", - "app.kubernetes.io/instance": "metallb", - "app.kubernetes.io/name": "metallb" + "app": "node-image-sweeper" }, - "serviceAccountName": "metallb-speaker", + "serviceAccountName": "node-image-sweeper", "nodeSelector": { "kubernetes.io/os": "linux" }, "images": [ - "quay.io/frrouting/frr:10.4.1", - "quay.io/metallb/speaker:v0.15.3" + "python:3.12.9-alpine3.20" ] }, { - "kind": "Deployment", - "namespace": "metallb-system", - "name": "metallb-controller", + "kind": "DaemonSet", + "namespace": "maintenance", + "name": "node-nofile", "labels": { - "app.kubernetes.io/component": "controller", - "app.kubernetes.io/instance": "metallb", - "app.kubernetes.io/name": "metallb" - }, - "serviceAccountName": "metallb-controller", - "nodeSelector": { - "kubernetes.io/os": "linux" + "app": "node-nofile" }, + "serviceAccountName": "node-nofile", + "nodeSelector": {}, "images": [ - "quay.io/metallb/controller:v0.15.3" + "bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131" ] }, { @@ -772,6 +780,21 @@ "registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04" ] }, + { + "kind": "DaemonSet", + "namespace": "monitoring", + "name": "jetson-tegrastats-exporter", + "labels": { + "app": "jetson-tegrastats-exporter" + }, + "serviceAccountName": "default", + "nodeSelector": { + "jetson": "true" + }, + "images": [ + "python:3.10-slim" + ] + }, { "kind": "Deployment", "namespace": "monitoring", @@ -797,7 +820,7 @@ "hardware": "rpi5" }, "images": [ - "collabora/code:latest" + "collabora/code@sha256:3c58d0e9bae75e4647467d0c7d91cb66f261d3e814709aed590b5c334a04db26" ] }, { @@ -815,6 +838,66 @@ "nextcloud:29-apache" ] }, + { + "kind": "Deployment", + "namespace": "outline", + "name": "outline", + "labels": { + "app": "outline" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "outlinewiki/outline:1.2.0" + ] + }, + { + "kind": "Deployment", + "namespace": "outline", + "name": "outline-redis", + "labels": { + "app": "outline-redis" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "redis:7.4.1-alpine" + ] + }, + { + "kind": "Deployment", + "namespace": "planka", + "name": "planka", + "labels": { + "app": "planka" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "ghcr.io/plankanban/planka:2.0.0-rc.4" + ] + }, + { + "kind": "StatefulSet", + "namespace": "postgres", + "name": "postgres", + "labels": { + "app": "postgres" + }, + "serviceAccountName": "postgres-vault", + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "postgres:15" + ] + }, { "kind": "Deployment", "namespace": "sso", @@ -984,22 +1067,6 @@ } ] }, - { - "namespace": "ci-demo", - "name": "ci-demo", - "type": "ClusterIP", - "selector": { - "app.kubernetes.io/name": "ci-demo" - }, - "ports": [ - { - "name": "http", - "port": 80, - "targetPort": "http", - "protocol": "TCP" - } - ] - }, { "namespace": "comms", "name": "coturn", @@ -1454,94 +1521,6 @@ } ] }, - { - "namespace": "comms", - "name": "othrys-element-element-web", - "type": "ClusterIP", - "selector": { - "app.kubernetes.io/instance": "othrys-element", - "app.kubernetes.io/name": "element-web" - }, - "ports": [ - { - "name": "http", - "port": 80, - "targetPort": "http", - "protocol": "TCP" - } - ] - }, - { - "namespace": "comms", - "name": "othrys-synapse-matrix-synapse", - "type": "ClusterIP", - "selector": { - "app.kubernetes.io/component": "synapse", - "app.kubernetes.io/instance": "othrys-synapse", - "app.kubernetes.io/name": "matrix-synapse" - }, - "ports": [ - { - "name": "http", - "port": 8008, - "targetPort": "http", - "protocol": "TCP" - } - ] - }, - { - "namespace": "comms", - "name": "othrys-synapse-redis-headless", - "type": "ClusterIP", - "selector": { - "app.kubernetes.io/instance": "othrys-synapse", - "app.kubernetes.io/name": "redis" - }, - "ports": [ - { - "name": "tcp-redis", - "port": 6379, - "targetPort": "redis", - "protocol": "TCP" - } - ] - }, - { - "namespace": "comms", - "name": "othrys-synapse-redis-master", - "type": "ClusterIP", - "selector": { - "app.kubernetes.io/component": "master", - "app.kubernetes.io/instance": "othrys-synapse", - "app.kubernetes.io/name": "redis" - }, - "ports": [ - { - "name": "tcp-redis", - "port": 6379, - "targetPort": "redis", - "protocol": "TCP" - } - ] - }, - { - "namespace": "comms", - "name": "othrys-synapse-replication", - "type": "ClusterIP", - "selector": { - "app.kubernetes.io/component": "synapse", - "app.kubernetes.io/instance": "othrys-synapse", - "app.kubernetes.io/name": "matrix-synapse" - }, - "ports": [ - { - "name": "replication", - "port": 9093, - "targetPort": "replication", - "protocol": "TCP" - } - ] - }, { "namespace": "crypto", "name": "monerod", @@ -1743,6 +1722,22 @@ } ] }, + { + "namespace": "logging", + "name": "oauth2-proxy-logs", + "type": "ClusterIP", + "selector": { + "app": "oauth2-proxy-logs" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 4180, + "protocol": "TCP" + } + ] + }, { "namespace": "longhorn-system", "name": "oauth2-proxy-longhorn", @@ -1823,24 +1818,6 @@ } ] }, - { - "namespace": "metallb-system", - "name": "metallb-webhook-service", - "type": "ClusterIP", - "selector": { - "app.kubernetes.io/component": "controller", - "app.kubernetes.io/instance": "metallb", - "app.kubernetes.io/name": "metallb" - }, - "ports": [ - { - "name": null, - "port": 443, - "targetPort": 9443, - "protocol": "TCP" - } - ] - }, { "namespace": "monitoring", "name": "dcgm-exporter", @@ -1857,6 +1834,22 @@ } ] }, + { + "namespace": "monitoring", + "name": "jetson-tegrastats-exporter", + "type": "ClusterIP", + "selector": { + "app": "jetson-tegrastats-exporter" + }, + "ports": [ + { + "name": "metrics", + "port": 9100, + "targetPort": "metrics", + "protocol": "TCP" + } + ] + }, { "namespace": "monitoring", "name": "postmark-exporter", @@ -1905,6 +1898,70 @@ } ] }, + { + "namespace": "outline", + "name": "outline", + "type": "ClusterIP", + "selector": { + "app": "outline" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "outline", + "name": "outline-redis", + "type": "ClusterIP", + "selector": { + "app": "outline-redis" + }, + "ports": [ + { + "name": "redis", + "port": 6379, + "targetPort": "redis", + "protocol": "TCP" + } + ] + }, + { + "namespace": "planka", + "name": "planka", + "type": "ClusterIP", + "selector": { + "app": "planka" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "postgres", + "name": "postgres-service", + "type": "ClusterIP", + "selector": { + "app": "postgres" + }, + "ports": [ + { + "name": "postgres", + "port": 5432, + "targetPort": 5432, + "protocol": "TCP" + } + ] + }, { "namespace": "sso", "name": "keycloak", @@ -2110,7 +2167,7 @@ "via": { "kind": "Ingress", "name": "matrix-wellknown-bstein-dev", - "source": "communication" + "source": "comms" } }, { @@ -2130,7 +2187,7 @@ "via": { "kind": "Ingress", "name": "matrix-wellknown-bstein-dev", - "source": "communication" + "source": "comms" } }, { @@ -2170,7 +2227,7 @@ "via": { "kind": "Ingress", "name": "element-call", - "source": "communication" + "source": "comms" } }, { @@ -2250,7 +2307,7 @@ "via": { "kind": "Ingress", "name": "livekit-jwt-ingress", - "source": "communication" + "source": "comms" } }, { @@ -2270,27 +2327,7 @@ "via": { "kind": "Ingress", "name": "livekit-ingress", - "source": "communication" - } - }, - { - "host": "live.bstein.dev", - "path": "/", - "backend": { - "namespace": "comms", - "service": "othrys-element-element-web", - "port": 80, - "workloads": [ - { - "kind": "Deployment", - "name": "othrys-element-element-web" - } - ] - }, - "via": { - "kind": "Ingress", - "name": "othrys-element-element-web", - "source": "communication" + "source": "comms" } }, { @@ -2310,7 +2347,7 @@ "via": { "kind": "Ingress", "name": "matrix-wellknown", - "source": "communication" + "source": "comms" } }, { @@ -2330,7 +2367,7 @@ "via": { "kind": "Ingress", "name": "matrix-wellknown", - "source": "communication" + "source": "comms" } }, { @@ -2340,17 +2377,32 @@ "namespace": "comms", "service": "othrys-synapse-matrix-synapse", "port": 8008, + "workloads": [] + }, + "via": { + "kind": "Ingress", + "name": "matrix-routing", + "source": "comms" + } + }, + { + "host": "logs.bstein.dev", + "path": "/", + "backend": { + "namespace": "logging", + "service": "oauth2-proxy-logs", + "port": "http", "workloads": [ { "kind": "Deployment", - "name": "othrys-synapse-matrix-synapse" + "name": "oauth2-proxy-logs" } ] }, "via": { "kind": "Ingress", - "name": "matrix-routing", - "source": "communication" + "name": "logs", + "source": "logging" } }, { @@ -2405,7 +2457,7 @@ "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2425,7 +2477,7 @@ "via": { "kind": "Ingress", "name": "matrix-wellknown-matrix-live", - "source": "communication" + "source": "comms" } }, { @@ -2445,7 +2497,7 @@ "via": { "kind": "Ingress", "name": "matrix-wellknown-matrix-live", - "source": "communication" + "source": "comms" } }, { @@ -2455,17 +2507,12 @@ "namespace": "comms", "service": "othrys-synapse-matrix-synapse", "port": 8008, - "workloads": [ - { - "kind": "Deployment", - "name": "othrys-synapse-matrix-synapse" - } - ] + "workloads": [] }, "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2485,7 +2532,7 @@ "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2505,7 +2552,7 @@ "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2525,7 +2572,7 @@ "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2545,7 +2592,7 @@ "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2565,7 +2612,7 @@ "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2575,17 +2622,12 @@ "namespace": "comms", "service": "othrys-synapse-matrix-synapse", "port": 8008, - "workloads": [ - { - "kind": "Deployment", - "name": "othrys-synapse-matrix-synapse" - } - ] + "workloads": [] }, "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2608,6 +2650,26 @@ "source": "monerod" } }, + { + "host": "notes.bstein.dev", + "path": "/", + "backend": { + "namespace": "outline", + "service": "outline", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "outline" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "outline", + "source": "outline" + } + }, { "host": "office.bstein.dev", "path": "/", @@ -2728,6 +2790,26 @@ "source": "jellyfin" } }, + { + "host": "tasks.bstein.dev", + "path": "/", + "backend": { + "namespace": "planka", + "service": "planka", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "planka" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "planka", + "source": "planka" + } + }, { "host": "vault.bstein.dev", "path": "/", @@ -2750,12 +2832,27 @@ } ], "helmrelease_host_hints": { + "comms:comms/othrys-element": [ + "call.live.bstein.dev", + "live.bstein.dev", + "matrix.live.bstein.dev" + ], + "comms:comms/othrys-synapse": [ + "bstein.dev", + "kit.live.bstein.dev", + "live.bstein.dev", + "matrix.live.bstein.dev", + "turn.live.bstein.dev" + ], "gitops-ui:flux-system/weave-gitops": [ "cd.bstein.dev" ], "harbor:harbor/harbor": [ "registry.bstein.dev" ], + "logging:logging/data-prepper": [ + "registry.bstein.dev" + ], "mailu:mailu-mailserver/mailu": [ "bstein.dev", "mail.bstein.dev" @@ -2764,6 +2861,7 @@ "alerts.bstein.dev" ], "monitoring:monitoring/grafana": [ + "bstein.dev", "metrics.bstein.dev", "sso.bstein.dev" ] diff --git a/knowledge/catalog/atlas.yaml b/knowledge/catalog/atlas.yaml index d628b7b..f3e04a8 100644 --- a/knowledge/catalog/atlas.yaml +++ b/knowledge/catalog/atlas.yaml @@ -1,3 +1,4 @@ +# knowledge/catalog/atlas.yaml # Generated by scripts/knowledge_render_atlas.py (do not edit by hand) cluster: atlas sources: @@ -7,7 +8,7 @@ sources: - name: bstein-dev-home path: services/bstein-dev-home targetNamespace: bstein-dev-home -- name: communication +- name: comms path: services/comms targetNamespace: comms - name: core @@ -40,12 +41,18 @@ sources: - name: keycloak path: services/keycloak targetNamespace: sso +- name: logging + path: services/logging + targetNamespace: null - name: longhorn-ui path: infrastructure/longhorn/ui-ingress targetNamespace: longhorn-system - name: mailu path: services/mailu targetNamespace: mailu-mailserver +- name: maintenance + path: services/maintenance + targetNamespace: null - name: metallb path: infrastructure/metallb targetNamespace: metallb-system @@ -67,9 +74,18 @@ sources: - name: openldap path: services/openldap targetNamespace: sso +- name: outline + path: services/outline + targetNamespace: outline - name: pegasus path: services/pegasus targetNamespace: jellyfin +- name: planka + path: services/planka + targetNamespace: planka +- name: postgres + path: infrastructure/postgres + targetNamespace: postgres - name: sui-metrics path: services/sui-metrics/overlays/atlas targetNamespace: sui-metrics @@ -97,7 +113,7 @@ workloads: serviceAccountName: null nodeSelector: {} images: - - ollama/ollama:latest + - ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d - kind: Deployment namespace: bstein-dev-home name: bstein-dev-home-backend @@ -108,7 +124,7 @@ workloads: kubernetes.io/arch: arm64 node-role.kubernetes.io/worker: 'true' images: - - registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-84 + - registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 - kind: Deployment namespace: bstein-dev-home name: bstein-dev-home-frontend @@ -119,7 +135,7 @@ workloads: kubernetes.io/arch: arm64 node-role.kubernetes.io/worker: 'true' images: - - registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-84 + - registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92 - kind: Deployment namespace: bstein-dev-home name: chat-ai-gateway @@ -160,7 +176,7 @@ workloads: nodeSelector: hardware: rpi5 images: - - ghcr.io/element-hq/element-call:latest + - ghcr.io/element-hq/element-call@sha256:e6897c7818331714eae19d83ef8ea94a8b41115f0d8d3f62c2fed2d02c65c9bc - kind: Deployment namespace: comms name: livekit @@ -209,42 +225,6 @@ workloads: nodeSelector: {} images: - nginx:1.27-alpine -- kind: Deployment - namespace: comms - name: othrys-element-element-web - labels: - app.kubernetes.io/instance: othrys-element - app.kubernetes.io/name: element-web - serviceAccountName: othrys-element-element-web - nodeSelector: - hardware: rpi5 - images: - - ghcr.io/element-hq/element-web:v1.12.6 -- kind: Deployment - namespace: comms - name: othrys-synapse-matrix-synapse - labels: - app.kubernetes.io/component: synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/name: matrix-synapse - serviceAccountName: default - nodeSelector: - hardware: rpi5 - images: - - ghcr.io/element-hq/synapse:v1.144.0 -- kind: Deployment - namespace: comms - name: othrys-synapse-redis-master - labels: - app.kubernetes.io/component: master - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: redis - helm.sh/chart: redis-17.17.1 - serviceAccountName: othrys-synapse-redis - nodeSelector: {} - images: - - docker.io/bitnamilegacy/redis:7.0.12-debian-11-r34 - kind: DaemonSet namespace: crypto name: monero-xmrig @@ -254,7 +234,7 @@ workloads: nodeSelector: node-role.kubernetes.io/worker: 'true' images: - - ghcr.io/tari-project/xmrig:latest + - ghcr.io/tari-project/xmrig@sha256:80defbfd0b640d604c91cb5101d3642db7928e1e68ee3c6b011289b3565a39d9 - kind: Deployment namespace: crypto name: monero-p2pool @@ -447,6 +427,46 @@ workloads: kubernetes.io/os: linux images: - hashicorp/vault-csi-provider:1.7.0 +- kind: DaemonSet + namespace: logging + name: node-image-gc-rpi4 + labels: + app: node-image-gc-rpi4 + serviceAccountName: node-image-gc-rpi4 + nodeSelector: + hardware: rpi4 + images: + - bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 +- kind: DaemonSet + namespace: logging + name: node-image-prune-rpi5 + labels: + app: node-image-prune-rpi5 + serviceAccountName: node-image-prune-rpi5 + nodeSelector: + hardware: rpi5 + images: + - bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 +- kind: DaemonSet + namespace: logging + name: node-log-rotation + labels: + app: node-log-rotation + serviceAccountName: node-log-rotation + nodeSelector: + hardware: rpi5 + images: + - bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 +- kind: Deployment + namespace: logging + name: oauth2-proxy-logs + labels: + app: oauth2-proxy-logs + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 - kind: Deployment namespace: longhorn-system name: oauth2-proxy-longhorn @@ -466,7 +486,7 @@ workloads: nodeSelector: mailu.bstein.dev/vip: 'true' images: - - lachlanevenson/k8s-kubectl:latest + - registry.bstein.dev/bstein/kubectl:1.35.0 - kind: Deployment namespace: mailu-mailserver name: mailu-sync-listener @@ -477,30 +497,24 @@ workloads: images: - python:3.11-alpine - kind: DaemonSet - namespace: metallb-system - name: metallb-speaker + namespace: maintenance + name: node-image-sweeper labels: - app.kubernetes.io/component: speaker - app.kubernetes.io/instance: metallb - app.kubernetes.io/name: metallb - serviceAccountName: metallb-speaker + app: node-image-sweeper + serviceAccountName: node-image-sweeper nodeSelector: kubernetes.io/os: linux images: - - quay.io/frrouting/frr:10.4.1 - - quay.io/metallb/speaker:v0.15.3 -- kind: Deployment - namespace: metallb-system - name: metallb-controller + - python:3.12.9-alpine3.20 +- kind: DaemonSet + namespace: maintenance + name: node-nofile labels: - app.kubernetes.io/component: controller - app.kubernetes.io/instance: metallb - app.kubernetes.io/name: metallb - serviceAccountName: metallb-controller - nodeSelector: - kubernetes.io/os: linux + app: node-nofile + serviceAccountName: node-nofile + nodeSelector: {} images: - - quay.io/metallb/controller:v0.15.3 + - bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 - kind: DaemonSet namespace: monitoring name: dcgm-exporter @@ -510,6 +524,16 @@ workloads: nodeSelector: {} images: - registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04 +- kind: DaemonSet + namespace: monitoring + name: jetson-tegrastats-exporter + labels: + app: jetson-tegrastats-exporter + serviceAccountName: default + nodeSelector: + jetson: 'true' + images: + - python:3.10-slim - kind: Deployment namespace: monitoring name: postmark-exporter @@ -528,7 +552,7 @@ workloads: nodeSelector: hardware: rpi5 images: - - collabora/code:latest + - collabora/code@sha256:3c58d0e9bae75e4647467d0c7d91cb66f261d3e814709aed590b5c334a04db26 - kind: Deployment namespace: nextcloud name: nextcloud @@ -539,6 +563,46 @@ workloads: hardware: rpi5 images: - nextcloud:29-apache +- kind: Deployment + namespace: outline + name: outline + labels: + app: outline + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - outlinewiki/outline:1.2.0 +- kind: Deployment + namespace: outline + name: outline-redis + labels: + app: outline-redis + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - redis:7.4.1-alpine +- kind: Deployment + namespace: planka + name: planka + labels: + app: planka + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - ghcr.io/plankanban/planka:2.0.0-rc.4 +- kind: StatefulSet + namespace: postgres + name: postgres + labels: + app: postgres + serviceAccountName: postgres-vault + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - postgres:15 - kind: Deployment namespace: sso name: keycloak @@ -650,16 +714,6 @@ services: port: 80 targetPort: 8080 protocol: TCP -- namespace: ci-demo - name: ci-demo - type: ClusterIP - selector: - app.kubernetes.io/name: ci-demo - ports: - - name: http - port: 80 - targetPort: http - protocol: TCP - namespace: comms name: coturn type: LoadBalancer @@ -958,64 +1012,6 @@ services: port: 80 targetPort: 80 protocol: TCP -- namespace: comms - name: othrys-element-element-web - type: ClusterIP - selector: - app.kubernetes.io/instance: othrys-element - app.kubernetes.io/name: element-web - ports: - - name: http - port: 80 - targetPort: http - protocol: TCP -- namespace: comms - name: othrys-synapse-matrix-synapse - type: ClusterIP - selector: - app.kubernetes.io/component: synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/name: matrix-synapse - ports: - - name: http - port: 8008 - targetPort: http - protocol: TCP -- namespace: comms - name: othrys-synapse-redis-headless - type: ClusterIP - selector: - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/name: redis - ports: - - name: tcp-redis - port: 6379 - targetPort: redis - protocol: TCP -- namespace: comms - name: othrys-synapse-redis-master - type: ClusterIP - selector: - app.kubernetes.io/component: master - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/name: redis - ports: - - name: tcp-redis - port: 6379 - targetPort: redis - protocol: TCP -- namespace: comms - name: othrys-synapse-replication - type: ClusterIP - selector: - app.kubernetes.io/component: synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/name: matrix-synapse - ports: - - name: replication - port: 9093 - targetPort: replication - protocol: TCP - namespace: crypto name: monerod type: ClusterIP @@ -1143,6 +1139,16 @@ services: port: 443 targetPort: websecure protocol: TCP +- namespace: logging + name: oauth2-proxy-logs + type: ClusterIP + selector: + app: oauth2-proxy-logs + ports: + - name: http + port: 80 + targetPort: 4180 + protocol: TCP - namespace: longhorn-system name: oauth2-proxy-longhorn type: ClusterIP @@ -1195,18 +1201,6 @@ services: port: 8080 targetPort: 8080 protocol: TCP -- namespace: metallb-system - name: metallb-webhook-service - type: ClusterIP - selector: - app.kubernetes.io/component: controller - app.kubernetes.io/instance: metallb - app.kubernetes.io/name: metallb - ports: - - name: null - port: 443 - targetPort: 9443 - protocol: TCP - namespace: monitoring name: dcgm-exporter type: ClusterIP @@ -1217,6 +1211,16 @@ services: port: 9400 targetPort: metrics protocol: TCP +- namespace: monitoring + name: jetson-tegrastats-exporter + type: ClusterIP + selector: + app: jetson-tegrastats-exporter + ports: + - name: metrics + port: 9100 + targetPort: metrics + protocol: TCP - namespace: monitoring name: postmark-exporter type: ClusterIP @@ -1247,6 +1251,46 @@ services: port: 80 targetPort: http protocol: TCP +- namespace: outline + name: outline + type: ClusterIP + selector: + app: outline + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: outline + name: outline-redis + type: ClusterIP + selector: + app: outline-redis + ports: + - name: redis + port: 6379 + targetPort: redis + protocol: TCP +- namespace: planka + name: planka + type: ClusterIP + selector: + app: planka + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: postgres + name: postgres-service + type: ClusterIP + selector: + app: postgres + ports: + - name: postgres + port: 5432 + targetPort: 5432 + protocol: TCP - namespace: sso name: keycloak type: ClusterIP @@ -1378,7 +1422,7 @@ http_endpoints: via: kind: Ingress name: matrix-wellknown-bstein-dev - source: communication + source: comms - host: bstein.dev path: /.well-known/matrix/server backend: @@ -1389,7 +1433,7 @@ http_endpoints: via: kind: Ingress name: matrix-wellknown-bstein-dev - source: communication + source: comms - host: bstein.dev path: /api backend: @@ -1415,7 +1459,7 @@ http_endpoints: via: kind: Ingress name: element-call - source: communication + source: comms - host: chat.ai.bstein.dev path: / backend: @@ -1467,7 +1511,7 @@ http_endpoints: via: kind: Ingress name: livekit-jwt-ingress - source: communication + source: comms - host: kit.live.bstein.dev path: /livekit/sfu backend: @@ -1480,20 +1524,7 @@ http_endpoints: via: kind: Ingress name: livekit-ingress - source: communication -- host: live.bstein.dev - path: / - backend: - namespace: comms - service: othrys-element-element-web - port: 80 - workloads: - - kind: Deployment - name: othrys-element-element-web - via: - kind: Ingress - name: othrys-element-element-web - source: communication + source: comms - host: live.bstein.dev path: /.well-known/matrix/client backend: @@ -1504,7 +1535,7 @@ http_endpoints: via: kind: Ingress name: matrix-wellknown - source: communication + source: comms - host: live.bstein.dev path: /.well-known/matrix/server backend: @@ -1515,20 +1546,31 @@ http_endpoints: via: kind: Ingress name: matrix-wellknown - source: communication + source: comms - host: live.bstein.dev path: /_matrix backend: namespace: comms service: othrys-synapse-matrix-synapse port: 8008 - workloads: &id002 - - kind: Deployment - name: othrys-synapse-matrix-synapse + workloads: [] via: kind: Ingress name: matrix-routing - source: communication + source: comms +- host: logs.bstein.dev + path: / + backend: + namespace: logging + service: oauth2-proxy-logs + port: http + workloads: + - kind: Deployment + name: oauth2-proxy-logs + via: + kind: Ingress + name: logs + source: logging - host: longhorn.bstein.dev path: / backend: @@ -1559,13 +1601,13 @@ http_endpoints: namespace: comms service: matrix-authentication-service port: 8080 - workloads: &id003 + workloads: &id002 - kind: Deployment name: matrix-authentication-service via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: matrix.live.bstein.dev path: /.well-known/matrix/client backend: @@ -1576,7 +1618,7 @@ http_endpoints: via: kind: Ingress name: matrix-wellknown-matrix-live - source: communication + source: comms - host: matrix.live.bstein.dev path: /.well-known/matrix/server backend: @@ -1587,86 +1629,86 @@ http_endpoints: via: kind: Ingress name: matrix-wellknown-matrix-live - source: communication + source: comms - host: matrix.live.bstein.dev path: /_matrix backend: namespace: comms service: othrys-synapse-matrix-synapse port: 8008 - workloads: *id002 + workloads: [] via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: matrix.live.bstein.dev path: /_matrix/client/r0/register backend: namespace: comms service: matrix-guest-register port: 8080 - workloads: &id004 + workloads: &id003 - kind: Deployment name: matrix-guest-register via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: matrix.live.bstein.dev path: /_matrix/client/v3/login backend: namespace: comms service: matrix-authentication-service port: 8080 - workloads: *id003 + workloads: *id002 via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: matrix.live.bstein.dev path: /_matrix/client/v3/logout backend: namespace: comms service: matrix-authentication-service port: 8080 - workloads: *id003 + workloads: *id002 via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: matrix.live.bstein.dev path: /_matrix/client/v3/refresh backend: namespace: comms service: matrix-authentication-service port: 8080 - workloads: *id003 + workloads: *id002 via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: matrix.live.bstein.dev path: /_matrix/client/v3/register backend: namespace: comms service: matrix-guest-register port: 8080 - workloads: *id004 + workloads: *id003 via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: matrix.live.bstein.dev path: /_synapse backend: namespace: comms service: othrys-synapse-matrix-synapse port: 8008 - workloads: *id002 + workloads: [] via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: monero.bstein.dev path: / backend: @@ -1680,6 +1722,19 @@ http_endpoints: kind: Ingress name: monerod source: monerod +- host: notes.bstein.dev + path: / + backend: + namespace: outline + service: outline + port: 80 + workloads: + - kind: Deployment + name: outline + via: + kind: Ingress + name: outline + source: outline - host: office.bstein.dev path: / backend: @@ -1758,6 +1813,19 @@ http_endpoints: kind: Ingress name: jellyfin source: jellyfin +- host: tasks.bstein.dev + path: / + backend: + namespace: planka + service: planka + port: 80 + workloads: + - kind: Deployment + name: planka + via: + kind: Ingress + name: planka + source: planka - host: vault.bstein.dev path: / backend: @@ -1772,15 +1840,28 @@ http_endpoints: name: vaultwarden-ingress source: vaultwarden helmrelease_host_hints: + comms:comms/othrys-element: + - call.live.bstein.dev + - live.bstein.dev + - matrix.live.bstein.dev + comms:comms/othrys-synapse: + - bstein.dev + - kit.live.bstein.dev + - live.bstein.dev + - matrix.live.bstein.dev + - turn.live.bstein.dev gitops-ui:flux-system/weave-gitops: - cd.bstein.dev harbor:harbor/harbor: - registry.bstein.dev + logging:logging/data-prepper: + - registry.bstein.dev mailu:mailu-mailserver/mailu: - bstein.dev - mail.bstein.dev monitoring:monitoring/alertmanager: - alerts.bstein.dev monitoring:monitoring/grafana: + - bstein.dev - metrics.bstein.dev - sso.bstein.dev diff --git a/knowledge/diagrams/atlas-http.mmd b/knowledge/diagrams/atlas-http.mmd index ddd33d8..ab7c362 100644 --- a/knowledge/diagrams/atlas-http.mmd +++ b/knowledge/diagrams/atlas-http.mmd @@ -47,15 +47,14 @@ flowchart LR wl_comms_livekit["comms/livekit (Deployment)"] svc_comms_livekit --> wl_comms_livekit host_live_bstein_dev["live.bstein.dev"] - svc_comms_othrys_element_element_web["comms/othrys-element-element-web (Service)"] - host_live_bstein_dev --> svc_comms_othrys_element_element_web - wl_comms_othrys_element_element_web["comms/othrys-element-element-web (Deployment)"] - svc_comms_othrys_element_element_web --> wl_comms_othrys_element_element_web host_live_bstein_dev --> svc_comms_matrix_wellknown svc_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Service)"] host_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse - wl_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Deployment)"] - svc_comms_othrys_synapse_matrix_synapse --> wl_comms_othrys_synapse_matrix_synapse + host_logs_bstein_dev["logs.bstein.dev"] + svc_logging_oauth2_proxy_logs["logging/oauth2-proxy-logs (Service)"] + host_logs_bstein_dev --> svc_logging_oauth2_proxy_logs + wl_logging_oauth2_proxy_logs["logging/oauth2-proxy-logs (Deployment)"] + svc_logging_oauth2_proxy_logs --> wl_logging_oauth2_proxy_logs host_longhorn_bstein_dev["longhorn.bstein.dev"] svc_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Service)"] host_longhorn_bstein_dev --> svc_longhorn_system_oauth2_proxy_longhorn @@ -80,6 +79,11 @@ flowchart LR host_monero_bstein_dev --> svc_crypto_monerod wl_crypto_monerod["crypto/monerod (Deployment)"] svc_crypto_monerod --> wl_crypto_monerod + host_notes_bstein_dev["notes.bstein.dev"] + svc_outline_outline["outline/outline (Service)"] + host_notes_bstein_dev --> svc_outline_outline + wl_outline_outline["outline/outline (Deployment)"] + svc_outline_outline --> wl_outline_outline host_office_bstein_dev["office.bstein.dev"] svc_nextcloud_collabora["nextcloud/collabora (Service)"] host_office_bstein_dev --> svc_nextcloud_collabora @@ -110,6 +114,11 @@ flowchart LR host_stream_bstein_dev --> svc_jellyfin_jellyfin wl_jellyfin_jellyfin["jellyfin/jellyfin (Deployment)"] svc_jellyfin_jellyfin --> wl_jellyfin_jellyfin + host_tasks_bstein_dev["tasks.bstein.dev"] + svc_planka_planka["planka/planka (Service)"] + host_tasks_bstein_dev --> svc_planka_planka + wl_planka_planka["planka/planka (Deployment)"] + svc_planka_planka --> wl_planka_planka host_vault_bstein_dev["vault.bstein.dev"] svc_vaultwarden_vaultwarden_service["vaultwarden/vaultwarden-service (Service)"] host_vault_bstein_dev --> svc_vaultwarden_vaultwarden_service @@ -133,10 +142,7 @@ flowchart LR wl_comms_livekit_token_service svc_comms_livekit wl_comms_livekit - svc_comms_othrys_element_element_web - wl_comms_othrys_element_element_web svc_comms_othrys_synapse_matrix_synapse - wl_comms_othrys_synapse_matrix_synapse svc_comms_matrix_authentication_service wl_comms_matrix_authentication_service svc_comms_matrix_guest_register @@ -160,6 +166,10 @@ flowchart LR svc_jenkins_jenkins wl_jenkins_jenkins end + subgraph logging[logging] + svc_logging_oauth2_proxy_logs + wl_logging_oauth2_proxy_logs + end subgraph longhorn_system[longhorn-system] svc_longhorn_system_oauth2_proxy_longhorn wl_longhorn_system_oauth2_proxy_longhorn @@ -173,6 +183,14 @@ flowchart LR svc_nextcloud_collabora wl_nextcloud_collabora end + subgraph outline[outline] + svc_outline_outline + wl_outline_outline + end + subgraph planka[planka] + svc_planka_planka + wl_planka_planka + end subgraph sso[sso] svc_sso_oauth2_proxy wl_sso_oauth2_proxy diff --git a/scripts/dashboards_render_atlas.py b/scripts/dashboards_render_atlas.py index 01fe9c7..0931b48 100644 --- a/scripts/dashboards_render_atlas.py +++ b/scripts/dashboards_render_atlas.py @@ -85,19 +85,17 @@ WORKER_TOTAL = len(WORKER_NODES) CONTROL_SUFFIX = f"/{CONTROL_TOTAL}" WORKER_SUFFIX = f"/{WORKER_TOTAL}" # Namespaces considered infrastructure (excluded from workload counts) -INFRA_NAMESPACES = [ - "kube-system", - "longhorn-system", - "metallb-system", +INFRA_PATTERNS = [ + "kube-.*", + ".*-system", + "traefik", "monitoring", "logging", "cert-manager", - "flux-system", - "traefik", "maintenance", "postgres", ] -INFRA_REGEX = f"^({'|'.join(INFRA_NAMESPACES)})$" +INFRA_REGEX = f"^({'|'.join(INFRA_PATTERNS)})$" # Namespaces allowed on control plane without counting as workloads CP_ALLOWED_NS = INFRA_REGEX LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]" @@ -319,6 +317,25 @@ NAMESPACE_SCOPE_WORKLOAD = f'namespace!~"{INFRA_REGEX}"' NAMESPACE_SCOPE_ALL = 'namespace=~".*"' NAMESPACE_SCOPE_INFRA = f'namespace=~"{INFRA_REGEX}"' NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"] +GLUE_LABEL = 'label_atlas_bstein_dev_glue="true"' +GLUE_JOBS = f"kube_cronjob_labels{{{GLUE_LABEL}}}" +GLUE_FILTER = f"and on(namespace,cronjob) {GLUE_JOBS}" +GLUE_LAST_SUCCESS = f"(kube_cronjob_status_last_successful_time {GLUE_FILTER})" +GLUE_LAST_SCHEDULE = f"(kube_cronjob_status_last_schedule_time {GLUE_FILTER})" +GLUE_SUSPENDED = f"(kube_cronjob_spec_suspend {GLUE_FILTER}) == 1" +GLUE_ACTIVE = f"(kube_cronjob_status_active {GLUE_FILTER})" +GLUE_LAST_SUCCESS_AGE = f"(time() - {GLUE_LAST_SUCCESS})" +GLUE_LAST_SCHEDULE_AGE = f"(time() - {GLUE_LAST_SCHEDULE})" +GLUE_LAST_SUCCESS_AGE_HOURS = f"({GLUE_LAST_SUCCESS_AGE}) / 3600" +GLUE_LAST_SCHEDULE_AGE_HOURS = f"({GLUE_LAST_SCHEDULE_AGE}) / 3600" +GLUE_STALE_WINDOW_SEC = 36 * 3600 +GLUE_STALE = f"({GLUE_LAST_SUCCESS_AGE} > bool {GLUE_STALE_WINDOW_SEC})" +GLUE_MISSING = f"({GLUE_JOBS} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time)" +GLUE_STALE_ACTIVE = f"({GLUE_STALE} unless on(namespace,cronjob) {GLUE_SUSPENDED})" +GLUE_MISSING_ACTIVE = f"({GLUE_MISSING} unless on(namespace,cronjob) {GLUE_SUSPENDED})" +GLUE_STALE_COUNT = f"(sum({GLUE_STALE_ACTIVE}) + count({GLUE_MISSING_ACTIVE}))" +GLUE_MISSING_COUNT = f"count({GLUE_MISSING_ACTIVE})" +GLUE_SUSPENDED_COUNT = f"sum({GLUE_SUSPENDED})" GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"] GPU_NODE_REGEX = "|".join(GPU_NODES) TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))" @@ -965,7 +982,7 @@ def build_overview(): 30, "Mail Sent (1d)", 'max(postmark_outbound_sent{window="1d"})', - {"h": 2, "w": 6, "x": 0, "y": 8}, + {"h": 2, "w": 5, "x": 0, "y": 8}, unit="none", links=link_to("atlas-mail"), ) @@ -976,7 +993,7 @@ def build_overview(): "type": "stat", "title": "Mail Bounces (1d)", "datasource": PROM_DS, - "gridPos": {"h": 2, "w": 6, "x": 12, "y": 8}, + "gridPos": {"h": 2, "w": 5, "x": 10, "y": 8}, "targets": [ { "expr": 'max(postmark_outbound_bounce_rate{window="1d"})', @@ -1022,7 +1039,7 @@ def build_overview(): 32, "Mail Success Rate (1d)", 'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)', - {"h": 2, "w": 6, "x": 6, "y": 8}, + {"h": 2, "w": 5, "x": 5, "y": 8}, unit="percent", thresholds=mail_success_thresholds, decimals=1, @@ -1034,7 +1051,7 @@ def build_overview(): 33, "Mail Limit Used (30d)", "max(postmark_sending_limit_used_percent)", - {"h": 2, "w": 6, "x": 18, "y": 8}, + {"h": 2, "w": 5, "x": 15, "y": 8}, unit="percent", thresholds=mail_limit_thresholds, decimals=1, @@ -1072,7 +1089,7 @@ def build_overview(): namespace_cpu_share_expr(cpu_scope), {"h": 9, "w": 8, "x": 0, "y": 16}, links=namespace_scope_links("namespace_scope_cpu"), - description="Values are normalized within the selected scope; use panel links to switch scope.", + description="Shares are normalized within the selected filter. Switching scope changes the denominator.", ) ) panels.append( @@ -1082,7 +1099,7 @@ def build_overview(): namespace_gpu_share_expr(gpu_scope), {"h": 9, "w": 8, "x": 8, "y": 16}, links=namespace_scope_links("namespace_scope_gpu"), - description="Values are normalized within the selected scope; use panel links to switch scope.", + description="Shares are normalized within the selected filter. Switching scope changes the denominator.", ) ) panels.append( @@ -1092,7 +1109,7 @@ def build_overview(): namespace_ram_share_expr(ram_scope), {"h": 9, "w": 8, "x": 16, "y": 16}, links=namespace_scope_links("namespace_scope_ram"), - description="Values are normalized within the selected scope; use panel links to switch scope.", + description="Shares are normalized within the selected filter. Switching scope changes the denominator.", ) ) @@ -1727,7 +1744,7 @@ def build_storage_dashboard(): stat_panel( 31, "Maintenance Cron Freshness (s)", - 'time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob=~"image-sweeper|grafana-smtp-sync"})', + 'time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob="image-sweeper"})', {"h": 4, "w": 12, "x": 12, "y": 44}, unit="s", thresholds={ @@ -2136,6 +2153,98 @@ def build_mail_dashboard(): } +def build_testing_dashboard(): + panels = [] + sort_desc = [{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}] + + panels.append( + stat_panel( + 1, + "Glue Jobs Stale (>36h)", + GLUE_STALE_COUNT, + {"h": 4, "w": 6, "x": 0, "y": 0}, + unit="none", + thresholds={ + "mode": "absolute", + "steps": [ + {"color": "green", "value": None}, + {"color": "yellow", "value": 1}, + {"color": "orange", "value": 2}, + {"color": "red", "value": 3}, + ], + }, + ) + ) + panels.append( + table_panel( + 2, + "Glue Jobs Missing Success", + GLUE_MISSING_ACTIVE, + {"h": 4, "w": 6, "x": 6, "y": 0}, + unit="none", + transformations=sort_desc, + instant=True, + ) + ) + panels.append( + table_panel( + 3, + "Glue Jobs Suspended", + GLUE_SUSPENDED, + {"h": 4, "w": 6, "x": 12, "y": 0}, + unit="none", + transformations=sort_desc, + instant=True, + ) + ) + panels.append( + table_panel( + 4, + "Glue Jobs Active Runs", + GLUE_ACTIVE, + {"h": 4, "w": 6, "x": 18, "y": 0}, + unit="none", + transformations=sort_desc, + instant=True, + ) + ) + panels.append( + table_panel( + 5, + "Glue Jobs Last Success (hours ago)", + GLUE_LAST_SUCCESS_AGE_HOURS, + {"h": 8, "w": 12, "x": 0, "y": 4}, + unit="h", + transformations=sort_desc, + instant=True, + ) + ) + panels.append( + table_panel( + 6, + "Glue Jobs Last Schedule (hours ago)", + GLUE_LAST_SCHEDULE_AGE_HOURS, + {"h": 8, "w": 12, "x": 12, "y": 4}, + unit="h", + transformations=sort_desc, + instant=True, + ) + ) + + return { + "uid": "atlas-testing", + "title": "Atlas Testing", + "folderUid": PRIVATE_FOLDER, + "editable": True, + "panels": panels, + "time": {"from": "now-7d", "to": "now"}, + "annotations": {"list": []}, + "schemaVersion": 39, + "style": "dark", + "tags": ["atlas", "testing"], + } + + def build_gpu_dashboard(): panels = [] gpu_scope = "$namespace_scope_gpu" @@ -2146,7 +2255,7 @@ def build_gpu_dashboard(): namespace_gpu_share_expr(gpu_scope), {"h": 8, "w": 12, "x": 0, "y": 0}, links=namespace_scope_links("namespace_scope_gpu"), - description="Values are normalized within the selected scope; use panel links to switch scope.", + description="Shares are normalized within the selected filter. Switching scope changes the denominator.", ) ) panels.append( @@ -2229,6 +2338,10 @@ DASHBOARDS = { "builder": build_mail_dashboard, "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-mail.yaml", }, + "atlas-testing": { + "builder": build_testing_dashboard, + "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-testing.yaml", + }, "atlas-gpu": { "builder": build_gpu_dashboard, "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-gpu.yaml", diff --git a/scripts/knowledge_render_atlas.py b/scripts/knowledge_render_atlas.py index 50ac84c..c7f9f26 100644 --- a/scripts/knowledge_render_atlas.py +++ b/scripts/knowledge_render_atlas.py @@ -505,7 +505,9 @@ def main() -> int: diagram_path = out_dir / "diagrams" / "atlas-http.mmd" runbooks_json_path = out_dir / "catalog" / "runbooks.json" + catalog_rel = catalog_path.relative_to(REPO_ROOT).as_posix() catalog_path.write_text( + f"# {catalog_rel}\n" "# Generated by scripts/knowledge_render_atlas.py (do not edit by hand)\n" + yaml.safe_dump(catalog, sort_keys=False), encoding="utf-8", diff --git a/scripts/test_atlas_user_cleanup.py b/scripts/test_atlas_user_cleanup.py index 41ba708..2acf8a7 100755 --- a/scripts/test_atlas_user_cleanup.py +++ b/scripts/test_atlas_user_cleanup.py @@ -7,6 +7,8 @@ test accounts created via the bstein-dev-home onboarding portal. Targets (best-effort): - Keycloak users in realm "atlas" - Atlas portal Postgres rows (access_requests + dependent tables) + - Mailu mailboxes created for test users + - Nextcloud Mail accounts created for test users - Vaultwarden users/invites created by the portal Safety: @@ -56,6 +58,19 @@ class VaultwardenUser: status: int +@dataclass(frozen=True) +class MailuUser: + email: str + localpart: str + domain: str + + +@dataclass(frozen=True) +class NextcloudMailAccount: + account_id: str + email: str + + def _run(cmd: list[str], *, input_bytes: bytes | None = None) -> str: proc = subprocess.run( cmd, @@ -70,6 +85,19 @@ def _run(cmd: list[str], *, input_bytes: bytes | None = None) -> str: return proc.stdout.decode("utf-8", errors="replace") +def _run_capture(cmd: list[str], *, input_bytes: bytes | None = None) -> tuple[int, str, str]: + proc = subprocess.run( + cmd, + input=input_bytes, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=False, + ) + stdout = proc.stdout.decode("utf-8", errors="replace") + stderr = proc.stderr.decode("utf-8", errors="replace") + return proc.returncode, stdout, stderr + + def _kubectl_get_secret_value(namespace: str, name: str, key: str) -> str: raw_b64 = _run( [ @@ -110,6 +138,21 @@ def _kubectl_first_pod(namespace: str) -> str: return pod_name +def _kubectl_exec(namespace: str, target: str, cmd: list[str]) -> tuple[int, str, str]: + return _run_capture( + [ + "kubectl", + "-n", + namespace, + "exec", + "-i", + target, + "--", + *cmd, + ] + ) + + def _validate_prefixes(prefixes: list[str]) -> list[str]: cleaned: list[str] = [] for prefix in prefixes: @@ -187,6 +230,62 @@ def _keycloak_delete_user(server: str, realm: str, token: str, user_id: str) -> raise +def _sql_quote(value: str) -> str: + return "'" + value.replace("'", "''") + "'" + + +def _psql_exec(db_name: str, sql: str, *, user: str = "postgres") -> str: + postgres_pod = _kubectl_first_pod("postgres") + return _run( + [ + "kubectl", + "-n", + "postgres", + "exec", + "-i", + postgres_pod, + "--", + "psql", + "-U", + user, + "-d", + db_name, + "-c", + sql, + ] + ) + + +def _psql_tsv(db_name: str, sql: str, *, user: str = "postgres") -> list[list[str]]: + postgres_pod = _kubectl_first_pod("postgres") + out = _run( + [ + "kubectl", + "-n", + "postgres", + "exec", + "-i", + postgres_pod, + "--", + "psql", + "-U", + user, + "-d", + db_name, + "-At", + "-F", + "\t", + "-c", + sql, + ] + ) + rows: list[list[str]] = [] + for line in out.splitlines(): + parts = line.split("\t") + rows.append(parts) + return rows + + def _psql_json(portal_db_url: str, sql: str) -> list[dict[str, Any]]: postgres_pod = _kubectl_first_pod("postgres") out = _run( @@ -256,6 +355,89 @@ def _portal_delete_requests(portal_db_url: str, prefixes: list[str]) -> int: return int(match.group(1)) if match else 0 +def _mailu_list_users(prefixes: list[str], domain: str, db_name: str, protected: set[str]) -> list[MailuUser]: + if not prefixes or not domain: + return [] + clauses = " OR ".join([f"localpart LIKE '{p}%'" for p in prefixes]) + sql = ( + 'SELECT email, localpart, domain_name ' + 'FROM "user" ' + f"WHERE domain_name = {_sql_quote(domain)} AND ({clauses}) " + "ORDER BY email;" + ) + rows = _psql_tsv(db_name, sql) + users: list[MailuUser] = [] + for row in rows: + if len(row) < 3: + continue + email = row[0].strip() + if not email or email in protected: + continue + users.append(MailuUser(email=email, localpart=row[1].strip(), domain=row[2].strip())) + return users + + +def _mailu_delete_users(db_name: str, emails: list[str]) -> int: + if not emails: + return 0 + email_list = ",".join(_sql_quote(e) for e in emails) + sql = f'DELETE FROM "user" WHERE email IN ({email_list});' + out = _psql_exec(db_name, sql) + match = re.search(r"DELETE\\s+(\\d+)", out) + return int(match.group(1)) if match else 0 + + +_NEXTCLOUD_ACCOUNT_RE = re.compile(r"^Account\\s+(\\d+):") +_EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+") + + +def _nextcloud_exec(cmd: list[str]) -> tuple[int, str, str]: + namespace = os.getenv("NEXTCLOUD_NAMESPACE", "nextcloud").strip() or "nextcloud" + target = os.getenv("NEXTCLOUD_EXEC_TARGET", "deploy/nextcloud").strip() or "deploy/nextcloud" + return _kubectl_exec(namespace, target, cmd) + + +def _parse_nextcloud_mail_accounts(export_output: str) -> list[NextcloudMailAccount]: + accounts: list[NextcloudMailAccount] = [] + current_id = "" + for line in export_output.splitlines(): + line = line.strip() + if not line: + continue + match = _NEXTCLOUD_ACCOUNT_RE.match(line) + if match: + current_id = match.group(1) + continue + if not current_id or "@" not in line: + continue + email_match = _EMAIL_RE.search(line) + if not email_match: + continue + accounts.append(NextcloudMailAccount(account_id=current_id, email=email_match.group(0))) + current_id = "" + return accounts + + +def _nextcloud_list_mail_accounts(username: str) -> list[NextcloudMailAccount]: + occ_path = os.getenv("NEXTCLOUD_OCC_PATH", "/var/www/html/occ").strip() or "/var/www/html/occ" + rc, out, err = _nextcloud_exec(["php", occ_path, "mail:account:export", username]) + if rc != 0: + message = (err or out).strip() + lowered = message.lower() + if any(token in lowered for token in ("not found", "does not exist", "no such user", "unknown user")): + return [] + raise RuntimeError(f"nextcloud mail export failed for {username}: {message}") + return _parse_nextcloud_mail_accounts(out) + + +def _nextcloud_delete_mail_account(account_id: str) -> None: + occ_path = os.getenv("NEXTCLOUD_OCC_PATH", "/var/www/html/occ").strip() or "/var/www/html/occ" + rc, out, err = _nextcloud_exec(["php", occ_path, "mail:account:delete", "-q", account_id]) + if rc != 0: + message = (err or out).strip() + raise RuntimeError(f"nextcloud mail delete failed for account {account_id}: {message}") + + def _vaultwarden_admin_cookie(admin_token: str, base_url: str) -> str: data = urllib.parse.urlencode({"token": admin_token}).encode("utf-8") req = urllib.request.Request(f"{base_url}/admin", data=data, method="POST") @@ -356,6 +538,8 @@ def main() -> int: ), ) parser.add_argument("--skip-keycloak", action="store_true", help="Skip Keycloak user deletion.") + parser.add_argument("--skip-mailu", action="store_true", help="Skip Mailu mailbox cleanup.") + parser.add_argument("--skip-nextcloud-mail", action="store_true", help="Skip Nextcloud Mail account cleanup.") parser.add_argument("--skip-portal-db", action="store_true", help="Skip portal DB cleanup.") parser.add_argument("--skip-vaultwarden", action="store_true", help="Skip Vaultwarden cleanup.") parser.add_argument( @@ -364,6 +548,18 @@ def main() -> int: default=[], help="Keycloak usernames that must never be deleted (repeatable).", ) + parser.add_argument( + "--protect-mailu-email", + action="append", + default=[], + help="Mailu emails that must never be deleted (repeatable).", + ) + parser.add_argument( + "--protect-nextcloud-username", + action="append", + default=[], + help="Nextcloud usernames that must never be touched (repeatable).", + ) parser.add_argument( "--protect-vaultwarden-email", action="append", @@ -376,7 +572,11 @@ def main() -> int: apply = bool(args.apply) expected_confirm = ",".join(prefixes) protected_keycloak = {"bstein", "robotuser", *[u.strip() for u in args.protect_keycloak_username if u.strip()]} + protected_mailu = {e.strip() for e in args.protect_mailu_email if e.strip()} + protected_nextcloud = {u.strip() for u in args.protect_nextcloud_username if u.strip()} protected_vaultwarden = {e.strip() for e in args.protect_vaultwarden_email if e.strip()} + mailu_domain = os.getenv("MAILU_DOMAIN", "bstein.dev").strip() or "bstein.dev" + mailu_db_name = os.getenv("MAILU_DB_NAME", "mailu").strip() or "mailu" if apply and args.confirm != expected_confirm: raise SystemExit( @@ -388,23 +588,29 @@ def main() -> int: print("mode:", "APPLY (destructive)" if apply else "DRY RUN (no changes)") if protected_keycloak: print("protected keycloak usernames:", ", ".join(sorted(protected_keycloak))) + if protected_mailu: + print("protected mailu emails:", ", ".join(sorted(protected_mailu))) + if protected_nextcloud: + print("protected nextcloud usernames:", ", ".join(sorted(protected_nextcloud))) if protected_vaultwarden: print("protected vaultwarden emails:", ", ".join(sorted(protected_vaultwarden))) print() + portal_requests: list[PortalRequestRow] = [] if not args.skip_portal_db: portal_db_url = _kubectl_get_secret_value("bstein-dev-home", "atlas-portal-db", "PORTAL_DATABASE_URL") - requests = _portal_list_requests(portal_db_url, prefixes) - print(f"Portal DB: {len(requests)} access_requests matched") - for row in requests[:50]: + portal_requests = _portal_list_requests(portal_db_url, prefixes) + print(f"Portal DB: {len(portal_requests)} access_requests matched") + for row in portal_requests[:50]: print(f" {row.request_code}\t{row.status}\t{row.username}") - if len(requests) > 50: - print(f" ... and {len(requests) - 50} more") - if apply and requests: + if len(portal_requests) > 50: + print(f" ... and {len(portal_requests) - 50} more") + if apply and portal_requests: deleted = _portal_delete_requests(portal_db_url, prefixes) print(f"Portal DB: deleted {deleted} access_requests (cascade removes tasks/steps/artifacts).") print() + keycloak_users: list[KeycloakUser] = [] if not args.skip_keycloak: kc_server = os.getenv("KEYCLOAK_PUBLIC_URL", "https://sso.bstein.dev").rstrip("/") kc_realm = os.getenv("KEYCLOAK_REALM", "atlas") @@ -421,18 +627,63 @@ def main() -> int: if user.username in protected_keycloak: continue found[user.user_id] = user - users = list(found.values()) - users.sort(key=lambda u: u.username) - print(f"Keycloak: {len(users)} users matched") - for user in users[:50]: + keycloak_users = list(found.values()) + keycloak_users.sort(key=lambda u: u.username) + print(f"Keycloak: {len(keycloak_users)} users matched") + for user in keycloak_users[:50]: email = user.email or "-" print(f" {user.username}\t{email}\t{user.user_id}") - if len(users) > 50: - print(f" ... and {len(users) - 50} more") - if apply and users: - for user in users: + if len(keycloak_users) > 50: + print(f" ... and {len(keycloak_users) - 50} more") + if apply and keycloak_users: + for user in keycloak_users: _keycloak_delete_user(kc_server, kc_realm, token, user.user_id) - print(f"Keycloak: deleted {len(users)} users.") + print(f"Keycloak: deleted {len(keycloak_users)} users.") + print() + + if not args.skip_mailu: + mailu_users = _mailu_list_users(prefixes, mailu_domain, mailu_db_name, protected_mailu) + print(f"Mailu: {len(mailu_users)} mailboxes matched (domain={mailu_domain})") + for user in mailu_users[:50]: + print(f" {user.email}\t{user.localpart}\t{user.domain}") + if len(mailu_users) > 50: + print(f" ... and {len(mailu_users) - 50} more") + if apply and mailu_users: + deleted = _mailu_delete_users(mailu_db_name, [u.email for u in mailu_users]) + print(f"Mailu: deleted {deleted} mailboxes.") + print() + + if not args.skip_nextcloud_mail: + nextcloud_usernames = {row.username for row in portal_requests if row.username} + nextcloud_usernames.update({u.username for u in keycloak_users if u.username}) + nextcloud_usernames = {u for u in nextcloud_usernames if _starts_with_any(u, prefixes)} + nextcloud_usernames = {u for u in nextcloud_usernames if u not in protected_nextcloud} + + matches: list[tuple[str, NextcloudMailAccount]] = [] + for username in sorted(nextcloud_usernames): + accounts = _nextcloud_list_mail_accounts(username) + for account in accounts: + email = account.email.strip() + if not email: + continue + if not email.lower().endswith(f"@{mailu_domain.lower()}"): + continue + localpart = email.split("@", 1)[0] + if not _starts_with_any(localpart, prefixes): + continue + if email in protected_mailu: + continue + matches.append((username, account)) + + print(f"Nextcloud Mail: {len(matches)} accounts matched") + for username, account in matches[:50]: + print(f" {username}\t{account.account_id}\t{account.email}") + if len(matches) > 50: + print(f" ... and {len(matches) - 50} more") + if apply and matches: + for _, account in matches: + _nextcloud_delete_mail_account(account.account_id) + print(f"Nextcloud Mail: deleted {len(matches)} accounts.") print() if not args.skip_vaultwarden: diff --git a/scripts/tests/test_mailu_sync.py b/scripts/tests/test_mailu_sync.py index 49bd2e4..d5f9487 100644 --- a/scripts/tests/test_mailu_sync.py +++ b/scripts/tests/test_mailu_sync.py @@ -55,11 +55,11 @@ class _FakeResponse: class _FakeSession: - def __init__(self, put_resp, get_resp): + def __init__(self, put_resp, get_resps): self.put_resp = put_resp - self.get_resp = get_resp + self.get_resps = list(get_resps) self.put_called = False - self.get_called = False + self.get_calls = 0 def post(self, *args, **kwargs): return _FakeResponse({"access_token": "dummy"}) @@ -69,22 +69,26 @@ class _FakeSession: return self.put_resp def get(self, *args, **kwargs): - self.get_called = True - return self.get_resp + self.get_calls += 1 + if self.get_resps: + return self.get_resps.pop(0) + return _FakeResponse({}) def test_kc_update_attributes_succeeds(monkeypatch): sync = load_sync_module(monkeypatch) + current_resp = _FakeResponse({"attributes": {}}) ok_resp = _FakeResponse({"attributes": {"mailu_app_password": ["abc"]}}) - sync.SESSION = _FakeSession(_FakeResponse({}), ok_resp) + sync.SESSION = _FakeSession(_FakeResponse({}), [current_resp, ok_resp]) sync.kc_update_attributes("token", {"id": "u1", "username": "u1"}, {"mailu_app_password": "abc"}) - assert sync.SESSION.put_called and sync.SESSION.get_called + assert sync.SESSION.put_called and sync.SESSION.get_calls == 2 def test_kc_update_attributes_raises_without_attribute(monkeypatch): sync = load_sync_module(monkeypatch) + current_resp = _FakeResponse({"attributes": {}}) missing_attr_resp = _FakeResponse({"attributes": {}}, status=200) - sync.SESSION = _FakeSession(_FakeResponse({}), missing_attr_resp) + sync.SESSION = _FakeSession(_FakeResponse({}), [current_resp, missing_attr_resp]) with pytest.raises(Exception): sync.kc_update_attributes("token", {"id": "u1", "username": "u1"}, {"mailu_app_password": "abc"}) @@ -144,9 +148,25 @@ def test_main_generates_password_and_upserts(monkeypatch): sync = load_sync_module(monkeypatch) monkeypatch.setattr(sync.bcrypt_sha256, "hash", lambda password: f"hash:{password}") users = [ - {"id": "u1", "username": "user1", "email": "user1@example.com", "attributes": {}}, - {"id": "u2", "username": "user2", "email": "user2@example.com", "attributes": {"mailu_app_password": ["keepme"]}}, - {"id": "u3", "username": "user3", "email": "user3@other.com", "attributes": {}}, + { + "id": "u1", + "username": "user1", + "email": "user1@example.com", + "attributes": {"mailu_enabled": ["true"]}, + }, + { + "id": "u2", + "username": "user2", + "email": "user2@example.com", + "attributes": {"mailu_app_password": ["keepme"], "mailu_enabled": ["true"]}, + }, + { + "id": "u3", + "username": "user3", + "email": "user3@example.com", + "attributes": {"mailu_email": ["user3@example.com"]}, + }, + {"id": "u4", "username": "user4", "email": "user4@other.com", "attributes": {}}, ] updated = [] @@ -185,6 +205,6 @@ def test_main_generates_password_and_upserts(monkeypatch): sync.main() - # Always backfill mailu_email, even if Keycloak recovery email is external. + # Only mail-enabled users (or legacy users with a mailbox) are synced and backfilled. assert len(updated) == 3 assert conns and len(conns[0]._cursor.executions) == 3 diff --git a/services/ai-llm/deployment.yaml b/services/ai-llm/deployment.yaml index b6e6701..fa35440 100644 --- a/services/ai-llm/deployment.yaml +++ b/services/ai-llm/deployment.yaml @@ -42,7 +42,7 @@ spec: claimName: ollama-models initContainers: - name: warm-model - image: ollama/ollama:latest + image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d env: - name: OLLAMA_HOST value: 0.0.0.0 @@ -75,7 +75,7 @@ spec: nvidia.com/gpu.shared: 1 containers: - name: ollama - image: ollama/ollama:latest + image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d imagePullPolicy: IfNotPresent ports: - name: http diff --git a/services/bstein-dev-home/backend-deployment.yaml b/services/bstein-dev-home/backend-deployment.yaml index 2e92443..376622c 100644 --- a/services/bstein-dev-home/backend-deployment.yaml +++ b/services/bstein-dev-home/backend-deployment.yaml @@ -14,6 +14,34 @@ spec: metadata: labels: app: bstein-dev-home-backend + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "bstein-dev-home" + vault.hashicorp.com/agent-inject-secret-portal-env.sh: "kv/data/atlas/portal/atlas-portal-db" + vault.hashicorp.com/agent-inject-template-portal-env.sh: | + {{ with secret "kv/data/atlas/portal/atlas-portal-db" }} + export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}" + {{ end }} + {{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }} + export KEYCLOAK_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/chat-ai-keys-runtime" }} + export CHAT_KEY_MATRIX="{{ .Data.data.matrix }}" + export CHAT_KEY_HOMEPAGE="{{ .Data.data.homepage }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/portal-e2e-client" }} + export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}" + export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/mailu/mailu-initial-account-secret" }} + export SMTP_HOST="mailu-front.mailu-mailserver.svc.cluster.local" + export SMTP_PORT="587" + export SMTP_STARTTLS="true" + export SMTP_USE_TLS="false" + export SMTP_USERNAME="no-reply-portal@bstein.dev" + export SMTP_PASSWORD="{{ .Data.data.password }}" + export SMTP_FROM="no-reply-portal@bstein.dev" + {{ end }} spec: automountServiceAccountToken: true serviceAccountName: bstein-dev-home @@ -21,20 +49,16 @@ spec: kubernetes.io/arch: arm64 node-role.kubernetes.io/worker: "true" imagePullSecrets: - - name: harbor-bstein-robot + - name: harbor-regcred containers: - name: backend - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-95 imagePullPolicy: Always - command: ["gunicorn"] + command: ["/bin/sh", "-c"] args: - - -b - - 0.0.0.0:8080 - - --workers - - "2" - - --timeout - - "180" - - app:app + - >- + . /vault/secrets/portal-env.sh + && exec gunicorn -b 0.0.0.0:8080 --workers 2 --timeout 180 app:app env: - name: AI_CHAT_API value: http://ollama.ai.svc.cluster.local:11434 @@ -67,18 +91,8 @@ spec: value: atlas - name: KEYCLOAK_ADMIN_CLIENT_ID value: bstein-dev-home-admin - - name: KEYCLOAK_ADMIN_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: bstein-dev-home-keycloak-admin - key: client_secret - name: ACCOUNT_ALLOWED_GROUPS value: "" - - name: PORTAL_DATABASE_URL - valueFrom: - secretKeyRef: - name: atlas-portal-db - key: PORTAL_DATABASE_URL - name: HTTP_CHECK_TIMEOUT_SEC value: "2" - name: ACCESS_REQUEST_SUBMIT_RATE_LIMIT @@ -91,6 +105,22 @@ spec: value: "60" - name: ACCESS_REQUEST_INTERNAL_EMAIL_ALLOWLIST value: robotuser@bstein.dev + - name: WGER_NAMESPACE + value: health + - name: WGER_USER_SYNC_CRONJOB + value: wger-user-sync + - name: WGER_USER_SYNC_WAIT_TIMEOUT_SEC + value: "90" + - name: FIREFLY_NAMESPACE + value: finance + - name: FIREFLY_USER_SYNC_CRONJOB + value: firefly-user-sync + - name: FIREFLY_USER_SYNC_WAIT_TIMEOUT_SEC + value: "90" + - name: VAULTWARDEN_ADMIN_SESSION_TTL_SEC + value: "900" + - name: VAULTWARDEN_ADMIN_RATE_LIMIT_BACKOFF_SEC + value: "60" ports: - name: http containerPort: 8080 diff --git a/services/bstein-dev-home/backend-service.yaml b/services/bstein-dev-home/backend-service.yaml index 75ec16c..76be476 100644 --- a/services/bstein-dev-home/backend-service.yaml +++ b/services/bstein-dev-home/backend-service.yaml @@ -1,3 +1,4 @@ +# services/bstein-dev-home/backend-service.yaml apiVersion: v1 kind: Service metadata: diff --git a/services/bstein-dev-home/chat-ai-gateway-deployment.yaml b/services/bstein-dev-home/chat-ai-gateway-deployment.yaml index 7ac6504..40d74fe 100644 --- a/services/bstein-dev-home/chat-ai-gateway-deployment.yaml +++ b/services/bstein-dev-home/chat-ai-gateway-deployment.yaml @@ -14,7 +14,27 @@ spec: metadata: labels: app: chat-ai-gateway + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "bstein-dev-home" + vault.hashicorp.com/agent-inject-secret-portal-env.sh: "kv/data/atlas/portal/atlas-portal-db" + vault.hashicorp.com/agent-inject-template-portal-env.sh: | + {{ with secret "kv/data/atlas/portal/atlas-portal-db" }} + export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}" + {{ end }} + {{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }} + export KEYCLOAK_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/chat-ai-keys-runtime" }} + export CHAT_KEY_MATRIX="{{ .Data.data.matrix }}" + export CHAT_KEY_HOMEPAGE="{{ .Data.data.homepage }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/portal-e2e-client" }} + export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}" + export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} spec: + serviceAccountName: bstein-dev-home nodeSelector: kubernetes.io/arch: arm64 node-role.kubernetes.io/worker: "true" @@ -23,20 +43,10 @@ spec: image: python:3.11-slim command: ["/bin/sh","-c"] args: - - python /app/gateway.py + - . /vault/secrets/portal-env.sh && exec python /app/gateway.py env: - name: UPSTREAM_URL value: http://bstein-dev-home-backend/api/chat - - name: CHAT_KEY_MATRIX - valueFrom: - secretKeyRef: - name: chat-ai-keys-runtime - key: matrix - - name: CHAT_KEY_HOMEPAGE - valueFrom: - secretKeyRef: - name: chat-ai-keys-runtime - key: homepage ports: - name: http containerPort: 8080 diff --git a/services/bstein-dev-home/frontend-deployment.yaml b/services/bstein-dev-home/frontend-deployment.yaml index 3092edb..ef26e73 100644 --- a/services/bstein-dev-home/frontend-deployment.yaml +++ b/services/bstein-dev-home/frontend-deployment.yaml @@ -19,10 +19,10 @@ spec: kubernetes.io/arch: arm64 node-role.kubernetes.io/worker: "true" imagePullSecrets: - - name: harbor-bstein-robot + - name: harbor-regcred containers: - name: frontend - image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-95 imagePullPolicy: Always ports: - name: http diff --git a/services/bstein-dev-home/frontend-service.yaml b/services/bstein-dev-home/frontend-service.yaml index 8540580..ee1df10 100644 --- a/services/bstein-dev-home/frontend-service.yaml +++ b/services/bstein-dev-home/frontend-service.yaml @@ -1,3 +1,4 @@ +# services/bstein-dev-home/frontend-service.yaml apiVersion: v1 kind: Service metadata: diff --git a/services/bstein-dev-home/kustomization.yaml b/services/bstein-dev-home/kustomization.yaml index 81220e8..f9d3c87 100644 --- a/services/bstein-dev-home/kustomization.yaml +++ b/services/bstein-dev-home/kustomization.yaml @@ -6,7 +6,9 @@ resources: - namespace.yaml - image.yaml - rbac.yaml - - portal-e2e-client-secret-sync-rbac.yaml + - vault-serviceaccount.yaml + - secretproviderclass.yaml + - vault-sync-deployment.yaml - chat-ai-gateway-deployment.yaml - chat-ai-gateway-service.yaml - frontend-deployment.yaml @@ -18,9 +20,9 @@ resources: - ingress.yaml images: - name: registry.bstein.dev/bstein/bstein-dev-home-frontend - newTag: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} + newTag: 0.1.1-102 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} - name: registry.bstein.dev/bstein/bstein-dev-home-backend - newTag: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + newTag: 0.1.1-103 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} configMapGenerator: - name: chat-ai-gateway namespace: bstein-dev-home diff --git a/services/bstein-dev-home/namespace.yaml b/services/bstein-dev-home/namespace.yaml index ae77d71..a6a7c25 100644 --- a/services/bstein-dev-home/namespace.yaml +++ b/services/bstein-dev-home/namespace.yaml @@ -1,3 +1,4 @@ +# services/bstein-dev-home/namespace.yaml apiVersion: v1 kind: Namespace metadata: diff --git a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml index 3170f86..f22272e 100644 --- a/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml +++ b/services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml @@ -2,13 +2,49 @@ apiVersion: batch/v1 kind: Job metadata: - name: portal-onboarding-e2e-test-11 + name: portal-onboarding-e2e-test-19 namespace: bstein-dev-home spec: backoffLimit: 0 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "bstein-dev-home" + vault.hashicorp.com/agent-inject-secret-portal-env.sh: "kv/data/atlas/portal/atlas-portal-db" + vault.hashicorp.com/agent-inject-template-portal-env.sh: | + {{ with secret "kv/data/atlas/portal/atlas-portal-db" }} + export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}" + {{ end }} + {{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }} + export KEYCLOAK_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/chat-ai-keys-runtime" }} + export CHAT_KEY_MATRIX="{{ .Data.data.matrix }}" + export CHAT_KEY_HOMEPAGE="{{ .Data.data.homepage }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/portal-e2e-client" }} + export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}" + export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} spec: restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + serviceAccountName: bstein-dev-home containers: - name: test image: python:3.11-slim @@ -21,21 +57,6 @@ spec: value: atlas - name: KEYCLOAK_ADMIN_CLIENT_ID value: bstein-dev-home-admin - - name: KEYCLOAK_ADMIN_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: bstein-dev-home-keycloak-admin - key: client_secret - - name: PORTAL_E2E_CLIENT_ID - valueFrom: - secretKeyRef: - name: portal-e2e-client - key: client_id - - name: PORTAL_E2E_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: portal-e2e-client - key: client_secret - name: PORTAL_TARGET_CLIENT_ID value: bstein-dev-home - name: E2E_PORTAL_ADMIN_USERNAME @@ -53,7 +74,8 @@ spec: command: ["/bin/sh", "-c"] args: - | - set -euo pipefail + set -eu + . /vault/secrets/portal-env.sh python /scripts/test_portal_onboarding_flow.py volumeMounts: - name: tests diff --git a/services/bstein-dev-home/rbac.yaml b/services/bstein-dev-home/rbac.yaml index f97ed24..5ff26eb 100644 --- a/services/bstein-dev-home/rbac.yaml +++ b/services/bstein-dev-home/rbac.yaml @@ -106,3 +106,65 @@ subjects: - kind: ServiceAccount name: bstein-dev-home namespace: bstein-dev-home +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: bstein-dev-home-wger-user-sync + namespace: health +rules: + - apiGroups: ["batch"] + resources: ["cronjobs"] + verbs: ["get"] + resourceNames: ["wger-user-sync"] + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["create", "get", "list", "watch"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: bstein-dev-home-wger-user-sync + namespace: health +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: bstein-dev-home-wger-user-sync +subjects: + - kind: ServiceAccount + name: bstein-dev-home + namespace: bstein-dev-home +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: bstein-dev-home-firefly-user-sync + namespace: finance +rules: + - apiGroups: ["batch"] + resources: ["cronjobs"] + verbs: ["get"] + resourceNames: ["firefly-user-sync"] + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["create", "get", "list", "watch"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: bstein-dev-home-firefly-user-sync + namespace: finance +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: bstein-dev-home-firefly-user-sync +subjects: + - kind: ServiceAccount + name: bstein-dev-home + namespace: bstein-dev-home diff --git a/services/bstein-dev-home/scripts/test_portal_onboarding_flow.py b/services/bstein-dev-home/scripts/test_portal_onboarding_flow.py index 9c5124a..2903216 100644 --- a/services/bstein-dev-home/scripts/test_portal_onboarding_flow.py +++ b/services/bstein-dev-home/scripts/test_portal_onboarding_flow.py @@ -65,6 +65,23 @@ def _get_json(url: str, headers: dict[str, str] | None = None, timeout_s: int = raise SystemExit(f"HTTP {exc.code} from {url}: {raw}") +def _wait_for_portal_ready(base_url: str, timeout_s: int = 60) -> None: + health_url = f"{base_url.rstrip('/')}/api/healthz" + deadline_at = time.monotonic() + timeout_s + last_error = None + while time.monotonic() < deadline_at: + try: + req = urllib.request.Request(health_url, method="GET") + with urllib.request.urlopen(req, timeout=10) as resp: + if resp.status == 200: + return + except Exception as exc: + last_error = str(exc) + time.sleep(2) + suffix = f" (last_error={last_error})" if last_error else "" + raise SystemExit(f"portal health check timed out{suffix}") + + def _request_json( method: str, url: str, @@ -235,6 +252,7 @@ def _imap_wait_for_verify_token( def main() -> int: portal_base = _env("PORTAL_BASE_URL").rstrip("/") + portal_ready_timeout = int(os.environ.get("E2E_PORTAL_READY_TIMEOUT_SECONDS", "60")) keycloak_base = _env("KEYCLOAK_ADMIN_URL").rstrip("/") realm = _env("KEYCLOAK_REALM", "atlas") @@ -249,7 +267,7 @@ def main() -> int: if not contact_email: raise SystemExit("E2E_CONTACT_EMAIL must not be empty") - imap_host = os.environ.get("E2E_IMAP_HOST", "mailu-front.mailu-mailserver.svc.cluster.local").strip() + imap_host = os.environ.get("E2E_IMAP_HOST", "mail.bstein.dev").strip() imap_port = int(os.environ.get("E2E_IMAP_PORT", "993")) imap_keycloak_username = os.environ.get("E2E_IMAP_KEYCLOAK_USERNAME", "robotuser").strip() imap_wait_sec = int(os.environ.get("E2E_IMAP_WAIT_SECONDS", "90")) @@ -274,6 +292,8 @@ def main() -> int: if not mailu_password: raise SystemExit(f"Keycloak user {imap_keycloak_username!r} missing mailu_app_password attribute") + _wait_for_portal_ready(portal_base, timeout_s=portal_ready_timeout) + username_prefix = os.environ.get("E2E_USERNAME_PREFIX", "e2e-user") now = int(time.time()) username = f"{username_prefix}-{now}" @@ -336,6 +356,8 @@ def main() -> int: except SystemExit as exc: raise SystemExit(f"failed to exchange token for portal approval as {portal_admin_username!r}: {exc}") + _wait_for_portal_ready(portal_base, timeout_s=portal_ready_timeout) + approve_url = f"{portal_base}/api/admin/access/requests/{urllib.parse.quote(username, safe='')}/approve" approve_timeout_s = int(os.environ.get("E2E_APPROVE_TIMEOUT_SECONDS", "180")) approve_attempts = int(os.environ.get("E2E_APPROVE_ATTEMPTS", "3")) @@ -348,6 +370,10 @@ def main() -> int: break except (http.client.RemoteDisconnected, TimeoutError, urllib.error.URLError) as exc: approve_error = str(exc) + try: + _wait_for_portal_ready(portal_base, timeout_s=min(30, portal_ready_timeout)) + except SystemExit: + pass if attempt == approve_attempts: break time.sleep(3) diff --git a/services/bstein-dev-home/scripts/vaultwarden_cred_sync.py b/services/bstein-dev-home/scripts/vaultwarden_cred_sync.py index d259b31..cb4f9c8 100644 --- a/services/bstein-dev-home/scripts/vaultwarden_cred_sync.py +++ b/services/bstein-dev-home/scripts/vaultwarden_cred_sync.py @@ -2,8 +2,10 @@ from __future__ import annotations +import os import sys import time +from datetime import datetime, timezone from typing import Any, Iterable import httpx @@ -16,6 +18,8 @@ from atlas_portal.vaultwarden import invite_user VAULTWARDEN_EMAIL_ATTR = "vaultwarden_email" VAULTWARDEN_STATUS_ATTR = "vaultwarden_status" VAULTWARDEN_SYNCED_AT_ATTR = "vaultwarden_synced_at" +VAULTWARDEN_RETRY_COOLDOWN_SEC = int(os.getenv("VAULTWARDEN_RETRY_COOLDOWN_SEC", "1800")) +VAULTWARDEN_FAILURE_BAILOUT = int(os.getenv("VAULTWARDEN_FAILURE_BAILOUT", "2")) def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]: @@ -26,14 +30,22 @@ def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]: url = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users" first = 0 while True: - headers = client.headers() + headers = _headers_with_retry(client) # We need attributes for idempotency (vaultwarden_status/vaultwarden_email). Keycloak defaults to a # brief representation which may omit these. params = {"first": str(first), "max": str(page_size), "briefRepresentation": "false"} - with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http: - resp = http.get(url, params=params, headers=headers) - resp.raise_for_status() - payload = resp.json() + payload = None + for attempt in range(1, 6): + try: + with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http: + resp = http.get(url, params=params, headers=headers) + resp.raise_for_status() + payload = resp.json() + break + except httpx.HTTPError as exc: + if attempt == 5: + raise + time.sleep(attempt * 2) if not isinstance(payload, list) or not payload: return @@ -47,6 +59,19 @@ def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]: first += page_size +def _headers_with_retry(client, attempts: int = 6) -> dict[str, str]: + last_exc: Exception | None = None + for attempt in range(1, attempts + 1): + try: + return client.headers() + except Exception as exc: + last_exc = exc + time.sleep(attempt * 2) + if last_exc: + raise last_exc + raise RuntimeError("failed to fetch keycloak headers") + + def _extract_attr(attrs: Any, key: str) -> str: if not isinstance(attrs, dict): return "" @@ -61,6 +86,21 @@ def _extract_attr(attrs: Any, key: str) -> str: return "" +def _parse_synced_at(value: str) -> float | None: + value = (value or "").strip() + if not value: + return None + for fmt in ("%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S%z"): + try: + parsed = datetime.strptime(value, fmt) + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=timezone.utc) + return parsed.timestamp() + except ValueError: + continue + return None + + def _vaultwarden_email_for_user(user: dict[str, Any]) -> str: username = (user.get("username") if isinstance(user.get("username"), str) else "") or "" username = username.strip() @@ -108,6 +148,7 @@ def main() -> int: created = 0 skipped = 0 failures = 0 + consecutive_failures = 0 for user in _iter_keycloak_users(): username = (user.get("username") if isinstance(user.get("username"), str) else "") or "" @@ -137,6 +178,11 @@ def main() -> int: current_status = _extract_attr(full_user.get("attributes"), VAULTWARDEN_STATUS_ATTR) current_synced_at = _extract_attr(full_user.get("attributes"), VAULTWARDEN_SYNCED_AT_ATTR) + current_synced_ts = _parse_synced_at(current_synced_at) + if current_status in {"rate_limited", "error"} and current_synced_ts: + if time.time() - current_synced_ts < VAULTWARDEN_RETRY_COOLDOWN_SEC: + skipped += 1 + continue email = _vaultwarden_email_for_user(full_user) if not email: print(f"skip {username}: missing email", file=sys.stderr) @@ -167,6 +213,7 @@ def main() -> int: result = invite_user(email) if result.ok: created += 1 + consecutive_failures = 0 print(f"ok {username}: {result.status}") try: _set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status) @@ -175,12 +222,17 @@ def main() -> int: pass else: failures += 1 + if result.status in {"rate_limited", "error"}: + consecutive_failures += 1 print(f"err {username}: {result.status} {result.detail}", file=sys.stderr) try: _set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status) _set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())) except Exception: pass + if consecutive_failures >= VAULTWARDEN_FAILURE_BAILOUT: + print("vaultwarden: too many consecutive failures; aborting run", file=sys.stderr) + break print( f"done processed={processed} created_or_present={created} skipped={skipped} failures={failures}", diff --git a/services/bstein-dev-home/secretproviderclass.yaml b/services/bstein-dev-home/secretproviderclass.yaml new file mode 100644 index 0000000..f330fe6 --- /dev/null +++ b/services/bstein-dev-home/secretproviderclass.yaml @@ -0,0 +1,21 @@ +# services/bstein-dev-home/secretproviderclass.yaml +apiVersion: secrets-store.csi.x-k8s.io/v1 +kind: SecretProviderClass +metadata: + name: bstein-dev-home-vault + namespace: bstein-dev-home +spec: + provider: vault + parameters: + vaultAddress: "http://vault.vault.svc.cluster.local:8200" + roleName: "bstein-dev-home" + objects: | + - objectName: "harbor-pull__dockerconfigjson" + secretPath: "kv/data/atlas/harbor-pull/bstein-dev-home" + secretKey: "dockerconfigjson" + secretObjects: + - secretName: harbor-regcred + type: kubernetes.io/dockerconfigjson + data: + - objectName: harbor-pull__dockerconfigjson + key: .dockerconfigjson diff --git a/services/bstein-dev-home/vault-serviceaccount.yaml b/services/bstein-dev-home/vault-serviceaccount.yaml new file mode 100644 index 0000000..d3ea79a --- /dev/null +++ b/services/bstein-dev-home/vault-serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/bstein-dev-home/vault-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: bstein-dev-home-vault-sync + namespace: bstein-dev-home diff --git a/services/bstein-dev-home/vault-sync-deployment.yaml b/services/bstein-dev-home/vault-sync-deployment.yaml new file mode 100644 index 0000000..ad50f1e --- /dev/null +++ b/services/bstein-dev-home/vault-sync-deployment.yaml @@ -0,0 +1,34 @@ +# services/bstein-dev-home/vault-sync-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: bstein-dev-home-vault-sync + namespace: bstein-dev-home +spec: + replicas: 1 + selector: + matchLabels: + app: bstein-dev-home-vault-sync + template: + metadata: + labels: + app: bstein-dev-home-vault-sync + spec: + serviceAccountName: bstein-dev-home-vault-sync + containers: + - name: sync + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - "sleep infinity" + volumeMounts: + - name: vault-secrets + mountPath: /vault/secrets + readOnly: true + volumes: + - name: vault-secrets + csi: + driver: secrets-store.csi.k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: bstein-dev-home-vault diff --git a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml index 5e7c779..29141fe 100644 --- a/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml +++ b/services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml @@ -4,6 +4,8 @@ kind: CronJob metadata: name: vaultwarden-cred-sync namespace: bstein-dev-home + labels: + atlas.bstein.dev/glue: "true" spec: schedule: "*/15 * * * *" concurrencyPolicy: Forbid @@ -13,6 +15,27 @@ spec: spec: backoffLimit: 0 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "bstein-dev-home" + vault.hashicorp.com/agent-inject-secret-portal-env.sh: "kv/data/atlas/portal/atlas-portal-db" + vault.hashicorp.com/agent-inject-template-portal-env.sh: | + {{ with secret "kv/data/atlas/portal/atlas-portal-db" }} + export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}" + {{ end }} + {{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }} + export KEYCLOAK_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/chat-ai-keys-runtime" }} + export CHAT_KEY_MATRIX="{{ .Data.data.matrix }}" + export CHAT_KEY_HOMEPAGE="{{ .Data.data.homepage }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/portal-e2e-client" }} + export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}" + export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} spec: serviceAccountName: bstein-dev-home restartPolicy: Never @@ -20,14 +43,16 @@ spec: kubernetes.io/arch: arm64 node-role.kubernetes.io/worker: "true" imagePullSecrets: - - name: harbor-bstein-robot + - name: harbor-regcred containers: - name: sync - image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} + image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-95 imagePullPolicy: Always - command: - - python - - /scripts/vaultwarden_cred_sync.py + command: ["/bin/sh", "-c"] + args: + - >- + . /vault/secrets/portal-env.sh + && exec python /scripts/vaultwarden_cred_sync.py env: - name: PYTHONPATH value: /app @@ -41,13 +66,14 @@ spec: value: atlas - name: KEYCLOAK_ADMIN_CLIENT_ID value: bstein-dev-home-admin - - name: KEYCLOAK_ADMIN_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: bstein-dev-home-keycloak-admin - key: client_secret - name: HTTP_CHECK_TIMEOUT_SEC value: "20" + - name: VAULTWARDEN_ADMIN_SESSION_TTL_SEC + value: "900" + - name: VAULTWARDEN_RETRY_COOLDOWN_SEC + value: "1800" + - name: VAULTWARDEN_FAILURE_BAILOUT + value: "2" volumeMounts: - name: vaultwarden-cred-sync-script mountPath: /scripts diff --git a/services/comms/atlasbot-deployment.yaml b/services/comms/atlasbot-deployment.yaml index 4d8bfc7..4618053 100644 --- a/services/comms/atlasbot-deployment.yaml +++ b/services/comms/atlasbot-deployment.yaml @@ -16,7 +16,42 @@ spec: labels: app: atlasbot annotations: - checksum/atlasbot-configmap: manual-atlasbot-3 + checksum/atlasbot-configmap: manual-atlasbot-4 + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" + vault.hashicorp.com/agent-inject-template-turn-secret: | + {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" + vault.hashicorp.com/agent-inject-template-livekit-primary: | + {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-bot-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-seeder-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-matrix: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-homepage: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" + vault.hashicorp.com/agent-inject-template-mas-admin-secret: | + {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" + vault.hashicorp.com/agent-inject-template-synapse-db-pass: | + {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" + vault.hashicorp.com/agent-inject-template-mas-db-pass: | + {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-kc-secret: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} spec: serviceAccountName: atlasbot nodeSelector: @@ -27,7 +62,8 @@ spec: command: ["/bin/sh","-c"] args: - | - python /app/bot.py + . /vault/scripts/comms_vault_env.sh + exec python /app/bot.py env: - name: MATRIX_BASE value: http://othrys-synapse-matrix-synapse:8008 @@ -39,16 +75,6 @@ spec: value: http://victoria-metrics-single-server.monitoring.svc.cluster.local:8428 - name: BOT_USER value: atlasbot - - name: BOT_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: bot-password - - name: CHAT_API_KEY - valueFrom: - secretKeyRef: - name: chat-ai-keys-runtime - key: matrix - name: OLLAMA_URL value: https://chat.ai.bstein.dev/ - name: OLLAMA_MODEL @@ -67,6 +93,9 @@ spec: - name: kb mountPath: /kb readOnly: true + - name: vault-scripts + mountPath: /vault/scripts + readOnly: true volumes: - name: code configMap: @@ -85,3 +114,7 @@ spec: path: catalog/runbooks.json - key: atlas-http.mmd path: diagrams/atlas-http.mmd + - name: vault-scripts + configMap: + name: comms-vault-env + defaultMode: 0555 diff --git a/services/comms/bstein-force-leave-job.yaml b/services/comms/bstein-force-leave-job.yaml index 956330b..0286f8c 100644 --- a/services/comms/bstein-force-leave-job.yaml +++ b/services/comms/bstein-force-leave-job.yaml @@ -2,32 +2,46 @@ apiVersion: batch/v1 kind: Job metadata: - name: bstein-leave-rooms-6 + name: bstein-leave-rooms-12 namespace: comms spec: backoffLimit: 0 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" + vault.hashicorp.com/agent-inject-template-mas-admin-secret: | + {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} spec: restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + serviceAccountName: comms-vault volumes: - - name: mas-admin-client - secret: - secretName: mas-admin-client-runtime - items: - - key: client_secret - path: client_secret containers: - name: leave image: python:3.11-slim volumeMounts: - - name: mas-admin-client - mountPath: /etc/mas-admin-client - readOnly: true env: - name: MAS_ADMIN_CLIENT_ID value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM - name: MAS_ADMIN_CLIENT_SECRET_FILE - value: /etc/mas-admin-client/client_secret + value: /vault/secrets/mas-admin-secret - name: MAS_TOKEN_URL value: http://matrix-authentication-service:8080/oauth2/token - name: MAS_ADMIN_API_BASE diff --git a/services/comms/comms-secrets-ensure-job.yaml b/services/comms/comms-secrets-ensure-job.yaml index 877649b..b71dd40 100644 --- a/services/comms/comms-secrets-ensure-job.yaml +++ b/services/comms/comms-secrets-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: comms-secrets-ensure-1 + name: comms-secrets-ensure-6 namespace: comms spec: backoffLimit: 1 @@ -11,9 +11,23 @@ spec: spec: serviceAccountName: comms-secrets-ensure restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] containers: - name: ensure - image: bitnami/kubectl:latest + image: registry.bstein.dev/bstein/kubectl:1.35.0 command: ["/bin/sh", "-c"] args: - | @@ -25,68 +39,52 @@ spec: head -c 32 /dev/urandom | base64 | tr -d '\n' | tr '+/' '-_' | tr -d '=' } - get_secret_value() { - ns="$1" - name="$2" - key="$3" - kubectl -n "${ns}" get secret "${name}" -o "jsonpath={.data.${key}}" 2>/dev/null | base64 -d 2>/dev/null || true - } - - ensure_secret_key() { - ns="$1" - name="$2" - key="$3" - value="$4" - if ! kubectl -n "${ns}" get secret "${name}" >/dev/null 2>&1; then - kubectl -n "${ns}" create secret generic "${name}" --from-literal="${key}=${value}" >/dev/null - return - fi - existing="$(kubectl -n "${ns}" get secret "${name}" -o "jsonpath={.data.${key}}" 2>/dev/null || true)" - if [ -z "${existing}" ]; then - b64="$(printf '%s' "${value}" | base64 | tr -d '\n')" - payload="$(printf '{"data":{"%s":"%s"}}' "${key}" "${b64}")" - kubectl -n "${ns}" patch secret "${name}" --type=merge -p "${payload}" >/dev/null - fi - } - - ensure_chat_secret() { - ns="$1" - if ! kubectl -n "${ns}" get secret chat-ai-keys-runtime >/dev/null 2>&1; then - kubectl -n "${ns}" create secret generic chat-ai-keys-runtime \ - --from-literal=matrix="${CHAT_KEY_MATRIX}" \ - --from-literal=homepage="${CHAT_KEY_HOMEPAGE}" >/dev/null - return - fi - ensure_secret_key "${ns}" chat-ai-keys-runtime matrix "${CHAT_KEY_MATRIX}" - ensure_secret_key "${ns}" chat-ai-keys-runtime homepage "${CHAT_KEY_HOMEPAGE}" - } - - CHAT_KEY_MATRIX="$(get_secret_value comms chat-ai-keys-runtime matrix)" - CHAT_KEY_HOMEPAGE="$(get_secret_value comms chat-ai-keys-runtime homepage)" - if [ -z "${CHAT_KEY_MATRIX}" ] || [ -z "${CHAT_KEY_HOMEPAGE}" ]; then - ALT_MATRIX="$(get_secret_value bstein-dev-home chat-ai-keys-runtime matrix)" - ALT_HOMEPAGE="$(get_secret_value bstein-dev-home chat-ai-keys-runtime homepage)" - [ -z "${CHAT_KEY_MATRIX}" ] && CHAT_KEY_MATRIX="${ALT_MATRIX}" - [ -z "${CHAT_KEY_HOMEPAGE}" ] && CHAT_KEY_HOMEPAGE="${ALT_HOMEPAGE}" + vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}" + vault_role="${VAULT_ROLE:-comms-secrets}" + jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" + login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')" + vault_token="$(curl -sS --request POST --data "${login_payload}" \ + "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')" + if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then + echo "vault login failed" >&2 + exit 1 fi - [ -z "${CHAT_KEY_MATRIX}" ] && CHAT_KEY_MATRIX="$(safe_pass)" - [ -z "${CHAT_KEY_HOMEPAGE}" ] && CHAT_KEY_HOMEPAGE="$(safe_pass)" - ensure_chat_secret comms - ensure_chat_secret bstein-dev-home + vault_read() { + path="$1" + key="$2" + curl -sS -H "X-Vault-Token: ${vault_token}" \ + "${vault_addr}/v1/kv/data/atlas/${path}" | jq -r --arg key "${key}" '.data.data[$key] // empty' + } - ensure_secret_key comms turn-shared-secret TURN_STATIC_AUTH_SECRET "$(safe_pass)" - ensure_secret_key comms livekit-api primary "$(safe_pass)" - ensure_secret_key comms synapse-redis redis-password "$(safe_pass)" - ensure_secret_key comms synapse-macaroon macaroon_secret_key "$(safe_pass)" - ensure_secret_key comms atlasbot-credentials-runtime bot-password "$(safe_pass)" - ensure_secret_key comms atlasbot-credentials-runtime seeder-password "$(safe_pass)" + vault_write() { + path="$1" + key="$2" + value="$3" + payload="$(jq -nc --arg key "${key}" --arg value "${value}" '{data:{($key):$value}}')" + curl -sS -X POST -H "X-Vault-Token: ${vault_token}" \ + -d "${payload}" "${vault_addr}/v1/kv/data/atlas/${path}" >/dev/null + } - SYN_PASS="$(get_secret_value comms synapse-db POSTGRES_PASSWORD)" - if [ -z "${SYN_PASS}" ]; then - SYN_PASS="$(safe_pass)" - kubectl -n comms create secret generic synapse-db --from-literal=POSTGRES_PASSWORD="${SYN_PASS}" >/dev/null - fi + ensure_key() { + path="$1" + key="$2" + current="$(vault_read "${path}" "${key}")" + if [ -z "${current}" ]; then + current="$(safe_pass)" + vault_write "${path}" "${key}" "${current}" + fi + printf '%s' "${current}" + } + + ensure_key "comms/turn-shared-secret" "TURN_STATIC_AUTH_SECRET" >/dev/null + ensure_key "comms/livekit-api" "primary" >/dev/null + ensure_key "comms/synapse-redis" "redis-password" >/dev/null + ensure_key "comms/synapse-macaroon" "macaroon_secret_key" >/dev/null + ensure_key "comms/atlasbot-credentials-runtime" "bot-password" >/dev/null + ensure_key "comms/atlasbot-credentials-runtime" "seeder-password" >/dev/null + + SYN_PASS="$(ensure_key "comms/synapse-db" "POSTGRES_PASSWORD")" POD_NAME="$(kubectl -n postgres get pods -l app=postgres -o jsonpath='{.items[0].metadata.name}')" if [ -z "${POD_NAME}" ]; then diff --git a/services/comms/comms-secrets-ensure-rbac.yaml b/services/comms/comms-secrets-ensure-rbac.yaml index dfb4f21..47e41d4 100644 --- a/services/comms/comms-secrets-ensure-rbac.yaml +++ b/services/comms/comms-secrets-ensure-rbac.yaml @@ -4,6 +4,8 @@ kind: ServiceAccount metadata: name: comms-secrets-ensure namespace: comms +imagePullSecrets: + - name: harbor-regcred --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/services/comms/coturn.yaml b/services/comms/coturn.yaml index 12fa78a..9f3c64f 100644 --- a/services/comms/coturn.yaml +++ b/services/comms/coturn.yaml @@ -14,7 +14,44 @@ spec: metadata: labels: app: coturn + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" + vault.hashicorp.com/agent-inject-template-turn-secret: | + {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" + vault.hashicorp.com/agent-inject-template-livekit-primary: | + {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-bot-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-seeder-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-matrix: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-homepage: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" + vault.hashicorp.com/agent-inject-template-mas-admin-secret: | + {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" + vault.hashicorp.com/agent-inject-template-synapse-db-pass: | + {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" + vault.hashicorp.com/agent-inject-template-mas-db-pass: | + {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-kc-secret: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} spec: + serviceAccountName: comms-vault nodeSelector: hardware: rpi5 affinity: @@ -33,6 +70,7 @@ spec: - /bin/sh - -c - | + . /vault/scripts/comms_vault_env.sh exec /usr/bin/turnserver \ --no-cli \ --fingerprint \ @@ -57,11 +95,6 @@ spec: fieldPath: status.podIP - name: TURN_PUBLIC_IP value: "38.28.125.112" - - name: TURN_STATIC_AUTH_SECRET - valueFrom: - secretKeyRef: - name: turn-shared-secret - key: TURN_STATIC_AUTH_SECRET ports: - name: turn-udp containerPort: 3478 @@ -76,6 +109,9 @@ spec: - name: tls mountPath: /etc/coturn/tls readOnly: true + - name: vault-scripts + mountPath: /vault/scripts + readOnly: true resources: requests: cpu: 200m @@ -87,6 +123,10 @@ spec: - name: tls secret: secretName: turn-live-tls + - name: vault-scripts + configMap: + name: comms-vault-env + defaultMode: 0555 --- apiVersion: v1 kind: Service diff --git a/services/comms/element-call-deployment.yaml b/services/comms/element-call-deployment.yaml index 7f3581d..149dcd1 100644 --- a/services/comms/element-call-deployment.yaml +++ b/services/comms/element-call-deployment.yaml @@ -19,7 +19,7 @@ spec: hardware: rpi5 containers: - name: element-call - image: ghcr.io/element-hq/element-call:latest + image: ghcr.io/element-hq/element-call@sha256:e6897c7818331714eae19d83ef8ea94a8b41115f0d8d3f62c2fed2d02c65c9bc ports: - containerPort: 8080 name: http diff --git a/services/comms/element-rendered.yaml b/services/comms/element-rendered.yaml deleted file mode 100644 index 0d3200e..0000000 --- a/services/comms/element-rendered.yaml +++ /dev/null @@ -1,202 +0,0 @@ ---- -# Source: element-web/templates/serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: othrys-element-element-web - labels: - helm.sh/chart: element-web-1.4.26 - app.kubernetes.io/name: element-web - app.kubernetes.io/instance: othrys-element - app.kubernetes.io/version: "1.12.6" - app.kubernetes.io/managed-by: Helm ---- -# Source: element-web/templates/configuration-nginx.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: othrys-element-element-web-nginx - labels: - helm.sh/chart: element-web-1.4.26 - app.kubernetes.io/name: element-web - app.kubernetes.io/instance: othrys-element - app.kubernetes.io/version: "1.12.6" - app.kubernetes.io/managed-by: Helm -data: - default.conf: | - server { - listen 8080; - listen [::]:8080; - server_name localhost; - - root /usr/share/nginx/html; - index index.html; - - add_header X-Frame-Options SAMEORIGIN; - add_header X-Content-Type-Options nosniff; - add_header X-XSS-Protection "1; mode=block"; - add_header Content-Security-Policy "frame-ancestors 'self'"; - - # Set no-cache for the index.html only so that browsers always check for a new copy of Element Web. - location = /index.html { - add_header Cache-Control "no-cache"; - } - - # redirect server error pages to the static page /50x.html - # - error_page 500 502 503 504 /50x.html; - } ---- -# Source: element-web/templates/configuration.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: othrys-element-element-web - labels: - helm.sh/chart: element-web-1.4.26 - app.kubernetes.io/name: element-web - app.kubernetes.io/instance: othrys-element - app.kubernetes.io/version: "1.12.6" - app.kubernetes.io/managed-by: Helm -data: - config.json: | - {"brand":"Othrys","default_server_config":{"m.homeserver":{"base_url":"https://matrix.live.bstein.dev","server_name":"live.bstein.dev"},"m.identity_server":{"base_url":"https://vector.im"}},"default_theme":"dark","disable_custom_urls":true,"disable_login_language_selector":true,"disable_guests":false,"registration_url":"https://bstein.dev/request-access","show_labs_settings":true,"features":{"feature_group_calls":true,"feature_video_rooms":true,"feature_element_call_video_rooms":true},"room_directory":{"servers":["live.bstein.dev"]},"jitsi":{},"element_call":{"url":"https://call.live.bstein.dev","participant_limit":16,"brand":"Othrys Call"}} ---- -# Source: element-web/templates/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: othrys-element-element-web - labels: - helm.sh/chart: element-web-1.4.26 - app.kubernetes.io/name: element-web - app.kubernetes.io/instance: othrys-element - app.kubernetes.io/version: "1.12.6" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 80 - targetPort: http - protocol: TCP - name: http - selector: - app.kubernetes.io/name: element-web - app.kubernetes.io/instance: othrys-element ---- -# Source: element-web/templates/deployment.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: othrys-element-element-web - labels: - helm.sh/chart: element-web-1.4.26 - app.kubernetes.io/name: element-web - app.kubernetes.io/instance: othrys-element - app.kubernetes.io/version: "1.12.6" - app.kubernetes.io/managed-by: Helm -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/name: element-web - app.kubernetes.io/instance: othrys-element - template: - metadata: - annotations: - checksum/config: manual-rtc-enable-1 - checksum/config-nginx: 085061d0925f4840c3770233509dc0b00fe8fa1a5fef8bf282a514fd101c76fa - labels: - app.kubernetes.io/name: element-web - app.kubernetes.io/instance: othrys-element - spec: - serviceAccountName: othrys-element-element-web - securityContext: - {} - containers: - - name: element-web - securityContext: - {} - image: "ghcr.io/element-hq/element-web:v1.12.6" - imagePullPolicy: IfNotPresent - env: - - name: ELEMENT_WEB_PORT - value: '8080' - ports: - - name: http - containerPort: 8080 - protocol: TCP - livenessProbe: - httpGet: - path: / - port: http - readinessProbe: - httpGet: - path: / - port: http - resources: - limits: - cpu: 500m - memory: 512Mi - requests: - cpu: 100m - memory: 256Mi - volumeMounts: - - mountPath: /app/config.json - name: config - subPath: config.json - - mountPath: /etc/nginx/conf.d/config.json - name: config-nginx - subPath: config.json - volumes: - - name: config - configMap: - name: othrys-element-element-web - - name: config-nginx - configMap: - name: othrys-element-element-web-nginx - nodeSelector: - hardware: rpi5 - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - preference: - matchExpressions: - - key: hardware - operator: In - values: - - rpi5 - - rpi4 - weight: 50 ---- -# Source: element-web/templates/ingress.yaml -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: othrys-element-element-web - labels: - helm.sh/chart: element-web-1.4.26 - app.kubernetes.io/name: element-web - app.kubernetes.io/instance: othrys-element - app.kubernetes.io/version: "1.12.6" - app.kubernetes.io/managed-by: Helm - annotations: - cert-manager.io/cluster-issuer: letsencrypt - traefik.ingress.kubernetes.io/router.entrypoints: websecure -spec: - ingressClassName: traefik - tls: - - hosts: - - "live.bstein.dev" - secretName: live-othrys-tls - rules: - - host: "live.bstein.dev" - http: - paths: - - path: / - backend: - service: - name: othrys-element-element-web - port: - number: 80 - pathType: Prefix diff --git a/services/comms/guest-name-job.yaml b/services/comms/guest-name-job.yaml index 156617d..21a8af5 100644 --- a/services/comms/guest-name-job.yaml +++ b/services/comms/guest-name-job.yaml @@ -4,6 +4,8 @@ kind: CronJob metadata: name: guest-name-randomizer namespace: comms + labels: + atlas.bstein.dev/glue: "true" spec: schedule: "*/1 * * * *" suspend: false @@ -14,21 +16,60 @@ spec: spec: backoffLimit: 0 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" + vault.hashicorp.com/agent-inject-template-turn-secret: | + {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" + vault.hashicorp.com/agent-inject-template-livekit-primary: | + {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-bot-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-seeder-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-matrix: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-homepage: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" + vault.hashicorp.com/agent-inject-template-mas-admin-secret: | + {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" + vault.hashicorp.com/agent-inject-template-synapse-db-pass: | + {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" + vault.hashicorp.com/agent-inject-template-mas-db-pass: | + {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-kc-secret: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} spec: restartPolicy: Never + serviceAccountName: comms-vault + nodeSelector: + hardware: rpi5 volumes: - - name: mas-admin-client - secret: - secretName: mas-admin-client-runtime - items: - - key: client_secret - path: client_secret + - name: vault-scripts + configMap: + name: comms-vault-env + defaultMode: 0555 containers: - name: rename - image: python:3.11-slim + image: registry.bstein.dev/bstein/comms-guest-tools:0.1.0 volumeMounts: - - name: mas-admin-client - mountPath: /etc/mas-admin-client + - name: vault-scripts + mountPath: /vault/scripts readOnly: true env: - name: SYNAPSE_BASE @@ -36,7 +77,7 @@ spec: - name: MAS_ADMIN_CLIENT_ID value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM - name: MAS_ADMIN_CLIENT_SECRET_FILE - value: /etc/mas-admin-client/client_secret + value: /vault/secrets/mas-admin-secret - name: MAS_ADMIN_API_BASE value: http://matrix-authentication-service:8081/api/admin/v1 - name: MAS_TOKEN_URL @@ -51,17 +92,12 @@ spec: value: synapse - name: PGUSER value: synapse - - name: PGPASSWORD - valueFrom: - secretKeyRef: - name: synapse-db - key: POSTGRES_PASSWORD command: - /bin/sh - -c - | set -euo pipefail - pip install --no-cache-dir requests psycopg2-binary >/dev/null + . /vault/scripts/comms_vault_env.sh python - <<'PY' import base64 import os @@ -90,6 +126,7 @@ spec: SEEDER_USER = os.environ["SEEDER_USER"] ROOM_ALIAS = "#othrys:live.bstein.dev" SERVER_NAME = "live.bstein.dev" + STALE_GUEST_MS = 14 * 24 * 60 * 60 * 1000 def mas_admin_token(): with open(MAS_ADMIN_CLIENT_SECRET_FILE, "r", encoding="utf-8") as f: @@ -202,6 +239,35 @@ spec: break return users + def should_prune_guest(entry, now_ms): + if not entry.get("is_guest"): + return False + last_seen = entry.get("last_seen_ts") + if last_seen is None: + return False + try: + last_seen = int(last_seen) + except (TypeError, ValueError): + return False + return now_ms - last_seen > STALE_GUEST_MS + + def prune_guest(token, user_id): + headers = {"Authorization": f"Bearer {token}"} + try: + r = requests.delete( + f"{BASE}/_synapse/admin/v2/users/{urllib.parse.quote(user_id)}", + headers=headers, + params={"erase": "true"}, + timeout=30, + ) + except Exception as exc: # noqa: BLE001 + print(f"guest prune failed for {user_id}: {exc}") + return False + if r.status_code in (200, 202, 204, 404): + return True + print(f"guest prune failed for {user_id}: {r.status_code} {r.text}") + return False + def user_id_for_username(username): return f"@{username}:live.bstein.dev" @@ -371,6 +437,7 @@ spec: except Exception as exc: # noqa: BLE001 print(f"synapse admin list skipped: {exc}") entries = [] + now_ms = int(time.time() * 1000) for entry in entries: user_id = entry.get("name") or "" if not user_id.startswith("@"): @@ -379,6 +446,9 @@ spec: if localpart in mas_usernames: continue is_guest = entry.get("is_guest") + if is_guest and should_prune_guest(entry, now_ms): + if prune_guest(seeder_token, user_id): + continue if not (is_guest or needs_rename_username(localpart)): continue display = get_displayname_admin(seeder_token, user_id) diff --git a/services/comms/guest-register-deployment.yaml b/services/comms/guest-register-deployment.yaml index 284cc42..04a0018 100644 --- a/services/comms/guest-register-deployment.yaml +++ b/services/comms/guest-register-deployment.yaml @@ -14,9 +14,24 @@ spec: metadata: annotations: checksum/config: guest-register-proxy-5 + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" + vault.hashicorp.com/agent-inject-template-mas-admin-secret: | + {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} labels: app.kubernetes.io/name: matrix-guest-register spec: + serviceAccountName: comms-vault + hostAliases: + - ip: "10.43.36.27" + hostnames: + - "matrix-authentication-service" + - "matrix-authentication-service.comms.svc.cluster.local" + - ip: "10.43.216.45" + hostnames: + - "othrys-synapse-matrix-synapse" + - "othrys-synapse-matrix-synapse.comms.svc.cluster.local" securityContext: runAsNonRoot: true runAsUser: 10001 @@ -42,7 +57,7 @@ spec: - name: MAS_ADMIN_CLIENT_ID value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM - name: MAS_ADMIN_CLIENT_SECRET_FILE - value: /etc/mas/admin-client/client_secret + value: /vault/secrets/mas-admin-secret - name: MAS_ADMIN_API_BASE value: http://matrix-authentication-service:8081/api/admin/v1 - name: SYNAPSE_BASE @@ -83,9 +98,6 @@ spec: mountPath: /app/server.py subPath: server.py readOnly: true - - name: mas-admin-client - mountPath: /etc/mas/admin-client - readOnly: true command: - python - /app/server.py @@ -96,9 +108,3 @@ spec: items: - key: server.py path: server.py - - name: mas-admin-client - secret: - secretName: mas-admin-client-runtime - items: - - key: client_secret - path: client_secret diff --git a/services/comms/helmrelease.yaml b/services/comms/helmrelease.yaml new file mode 100644 index 0000000..4456348 --- /dev/null +++ b/services/comms/helmrelease.yaml @@ -0,0 +1,428 @@ +# services/comms/helmrelease.yaml +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: othrys-synapse + namespace: comms +spec: + interval: 30m + chart: + spec: + chart: matrix-synapse + version: 3.12.17 + sourceRef: + kind: HelmRepository + name: ananace + namespace: flux-system + install: + remediation: { retries: 3 } + timeout: 15m + upgrade: + remediation: + retries: 3 + remediateLastFailure: true + cleanupOnFail: true + timeout: 15m + values: + serverName: live.bstein.dev + publicServerName: matrix.live.bstein.dev + + config: + publicBaseurl: https://matrix.live.bstein.dev + registrationSharedSecret: "vault-managed" + + serviceAccount: + create: false + name: comms-vault + + externalPostgresql: + host: postgres-service.postgres.svc.cluster.local + port: 5432 + username: synapse + existingSecret: vault-placeholder + existingSecretPasswordKey: postgres-password + database: synapse + + redis: + enabled: true + auth: + enabled: true + existingSecret: vault-placeholder + existingSecretPasswordKey: redis-password + + postgresql: + enabled: false + + persistence: + enabled: true + storageClass: asteria + accessMode: ReadWriteOnce + size: 50Gi + + synapse: + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + podSecurityContext: + fsGroup: 666 + runAsUser: 666 + runAsGroup: 666 + resources: + requests: + cpu: 500m + memory: 1Gi + limits: + cpu: "2" + memory: 3Gi + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-synapse-env.sh: "kv/data/atlas/comms/synapse-db" + vault.hashicorp.com/agent-inject-template-synapse-env.sh: | + {{ with secret "kv/data/atlas/comms/synapse-db" }} + export POSTGRES_PASSWORD="{{ .Data.data.POSTGRES_PASSWORD }}" + {{ end }} + {{ with secret "kv/data/atlas/comms/synapse-redis" }} + export REDIS_PASSWORD="{{ index .Data.data "redis-password" }}" + {{ end }} + {{ with secret "kv/data/atlas/comms/turn-shared-secret" }} + export TURN_SECRET="{{ .Data.data.TURN_STATIC_AUTH_SECRET }}" + {{ end }} + {{ with secret "kv/data/atlas/comms/mas-secrets-runtime" }} + export MAS_SHARED_SECRET="{{ .Data.data.matrix_shared_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/comms/synapse-registration" }} + export REGISTRATION_SHARED_SECRET="{{ .Data.data.registration_shared_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/comms/synapse-macaroon" }} + export MACAROON_SECRET_KEY="{{ .Data.data.macaroon_secret_key }}" + {{ end }} + vault.hashicorp.com/agent-inject-secret-synapse-signingkey: "kv/data/atlas/comms/othrys-synapse-signingkey" + vault.hashicorp.com/agent-inject-template-synapse-signingkey: | + {{ with secret "kv/data/atlas/comms/othrys-synapse-signingkey" }} + {{ index .Data.data "signing.key" }} + {{ end }} + extraEnv: [] + extraCommands: + - >- + esc() { printf "%s" "$1" | sed "s/'/''/g"; }; + printf '%s\n' + "matrix_authentication_service:" + " enabled: true" + " endpoint: http://matrix-authentication-service:8080/" + " secret: '$(esc "${MAS_SHARED_SECRET:-}")'" + "registration_shared_secret: '$(esc "${REGISTRATION_SHARED_SECRET:-}")'" + "turn_shared_secret: '$(esc "${TURN_SECRET:-}")'" + "macaroon_secret_key: '$(esc "${MACAROON_SECRET_KEY:-}")'" + > /synapse/config/conf.d/runtime-secrets.yaml + nodeSelector: + hardware: rpi5 + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5", "rpi4"] + + ingress: + enabled: false + + extraConfig: + allow_guest_access: true + allow_public_rooms_without_auth: true + auto_join_rooms: + - "#othrys:live.bstein.dev" + autocreate_auto_join_rooms: true + default_room_version: "11" + experimental_features: + msc3266_enabled: true + msc4143_enabled: true + msc4222_enabled: true + max_event_delay_duration: 24h + password_config: + enabled: false + rc_message: + per_second: 0.5 + burst_count: 30 + rc_delayed_event_mgmt: + per_second: 1 + burst_count: 20 + rc_login: + address: + burst_count: 20 + per_second: 5 + account: + burst_count: 20 + per_second: 5 + failed_attempts: + burst_count: 20 + per_second: 5 + room_list_publication_rules: + - action: allow + turn_uris: + - "turn:turn.live.bstein.dev:3478?transport=udp" + - "turn:turn.live.bstein.dev:3478?transport=tcp" + - "turns:turn.live.bstein.dev:5349?transport=tcp" + turn_allow_guests: true + turn_user_lifetime: 86400000 + well_known_client: + "m.homeserver": + "base_url": "https://matrix.live.bstein.dev" + "org.matrix.msc2965.authentication": + "issuer": "https://matrix.live.bstein.dev/" + "account": "https://matrix.live.bstein.dev/account/" + "org.matrix.msc4143.rtc_foci": + - type: "livekit" + livekit_service_url: "https://kit.live.bstein.dev/livekit/jwt" + + worker: + enabled: false + + signingkey: + job: + enabled: false + existingSecret: vault-placeholder + existingSecretKey: signing.key + postRenderers: + - kustomize: + patches: + - target: + kind: Deployment + name: othrys-synapse-matrix-synapse + patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: othrys-synapse-matrix-synapse + spec: + template: + spec: + serviceAccountName: comms-vault + automountServiceAccountToken: true + hostAliases: + - ip: "10.43.150.98" + hostnames: + - "othrys-synapse-redis-master" + - "othrys-synapse-redis-master.comms.svc.cluster.local" + - ip: "10.43.36.27" + hostnames: + - "matrix-authentication-service" + - "matrix-authentication-service.comms.svc.cluster.local" + containers: + - name: synapse + command: + - /entrypoint.sh + args: + - sh + - -c + - |- + export POSTGRES_PASSWORD=$(echo "${POSTGRES_PASSWORD:-}" | sed 's/\//\\\//g' | sed 's/\&/\\\&/g') + export REDIS_PASSWORD=$(echo "${REDIS_PASSWORD:-}" | sed 's/\//\\\//g' | sed 's/\&/\\\&/g') + cat /synapse/secrets/*.yaml | \ + sed -e "s/@@POSTGRES_PASSWORD@@/${POSTGRES_PASSWORD:-}/" \ + -e "s/@@REDIS_PASSWORD@@/${REDIS_PASSWORD:-}/" \ + > /synapse/config/conf.d/secrets.yaml + + esc() { printf "%s" "$1" | sed "s/'/''/g"; }; + printf '%s\n' \ + "matrix_authentication_service:" \ + " enabled: true" \ + " endpoint: http://matrix-authentication-service:8080/" \ + " secret: '$(esc "${MAS_SHARED_SECRET:-}")'" \ + "registration_shared_secret: '$(esc "${REGISTRATION_SHARED_SECRET:-}")'" \ + "turn_shared_secret: '$(esc "${TURN_SECRET:-}")'" \ + "macaroon_secret_key: '$(esc "${MACAROON_SECRET_KEY:-}")'" \ + > /synapse/config/conf.d/runtime-secrets.yaml + + exec python -B -m synapse.app.homeserver \ + -c /synapse/config/homeserver.yaml \ + -c /synapse/config/conf.d/ + env: + - $patch: replace + - name: VAULT_ENV_FILE + value: /vault/secrets/synapse-env.sh + - name: VAULT_COPY_FILES + value: /vault/secrets/synapse-signingkey:/synapse/keys/signing.key + volumeMounts: + - $patch: replace + - name: comms-vault-entrypoint + mountPath: /entrypoint.sh + subPath: vault-entrypoint.sh + - name: config + mountPath: /synapse/config + - name: tmpconf + mountPath: /synapse/config/conf.d + - name: secrets + mountPath: /synapse/secrets + - name: signingkey-writable + mountPath: /synapse/keys + - name: media + mountPath: /synapse/data + - name: tmpdir + mountPath: /tmp + volumes: + - name: signingkey + $patch: delete + - name: comms-vault-entrypoint + configMap: + name: comms-vault-entrypoint + defaultMode: 493 + - name: signingkey-writable + emptyDir: {} + - target: + kind: Deployment + name: othrys-synapse-redis-master + patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: othrys-synapse-redis-master + spec: + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-redis-env.sh: "kv/data/atlas/comms/synapse-redis" + vault.hashicorp.com/agent-inject-template-redis-env.sh: | + {{ with secret "kv/data/atlas/comms/synapse-redis" }} + export REDIS_PASSWORD="{{ index .Data.data "redis-password" }}" + {{ end }} + spec: + serviceAccountName: comms-vault + automountServiceAccountToken: true + containers: + - name: redis + command: + - /entrypoint.sh + args: + - /bin/bash + - -c + - /opt/bitnami/scripts/start-scripts/start-master.sh + env: + - name: REDIS_PASSWORD + $patch: delete + - name: VAULT_ENV_FILE + value: /vault/secrets/redis-env.sh + livenessProbe: + exec: + command: + - sh + - -c + - . /vault/secrets/redis-env.sh && /health/ping_liveness_local.sh 5 + readinessProbe: + exec: + command: + - sh + - -c + - . /vault/secrets/redis-env.sh && /health/ping_readiness_local.sh 1 + volumeMounts: + - name: comms-vault-entrypoint + mountPath: /entrypoint.sh + subPath: vault-entrypoint.sh + volumes: + - name: comms-vault-entrypoint + configMap: + name: comms-vault-entrypoint + defaultMode: 493 +--- +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: othrys-element + namespace: comms +spec: + interval: 30m + chart: + spec: + chart: element-web + version: 1.4.26 + sourceRef: + kind: HelmRepository + name: ananace + namespace: flux-system + install: + remediation: { retries: 3 } + timeout: 10m + upgrade: + remediation: + retries: 3 + remediateLastFailure: true + cleanupOnFail: true + timeout: 10m + values: + replicaCount: 1 + + defaultServer: + url: https://matrix.live.bstein.dev + name: live.bstein.dev + + config: + default_server_name: live.bstein.dev + default_theme: dark + brand: Othrys + disable_custom_urls: true + disable_login_language_selector: true + disable_guests: false + show_labs_settings: true + features: + feature_group_calls: true + feature_video_rooms: true + feature_element_call_video_rooms: true + room_directory: + servers: + - live.bstein.dev + jitsi: {} + element_call: + url: https://call.live.bstein.dev + participant_limit: 16 + brand: Othrys Call + extraVolumes: + - name: element-host-config + configMap: + name: othrys-element-host-config + defaultMode: 0555 + extraVolumeMounts: + - name: element-host-config + mountPath: /docker-entrypoint.d/20-host-config.sh + subPath: 20-host-config.sh + readOnly: true + + ingress: + enabled: true + className: traefik + annotations: + cert-manager.io/cluster-issuer: letsencrypt + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + hosts: + - live.bstein.dev + tls: + - secretName: live-othrys-tls + hosts: [live.bstein.dev] + + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + + nodeSelector: + hardware: rpi5 + + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5", "rpi4"] diff --git a/services/comms/knowledge/catalog/atlas-summary.json b/services/comms/knowledge/catalog/atlas-summary.json index 2139e29..fa35051 100644 --- a/services/comms/knowledge/catalog/atlas-summary.json +++ b/services/comms/knowledge/catalog/atlas-summary.json @@ -1,8 +1,8 @@ { "counts": { - "helmrelease_host_hints": 7, - "http_endpoints": 35, - "services": 44, - "workloads": 49 + "helmrelease_host_hints": 17, + "http_endpoints": 37, + "services": 43, + "workloads": 54 } } diff --git a/services/comms/knowledge/catalog/atlas.json b/services/comms/knowledge/catalog/atlas.json index 92f08f4..0d97bcd 100644 --- a/services/comms/knowledge/catalog/atlas.json +++ b/services/comms/knowledge/catalog/atlas.json @@ -12,12 +12,7 @@ "targetNamespace": "bstein-dev-home" }, { - "name": "ci-demo", - "path": "services/ci-demo", - "targetNamespace": null - }, - { - "name": "communication", + "name": "comms", "path": "services/comms", "targetNamespace": "comms" }, @@ -71,6 +66,11 @@ "path": "services/keycloak", "targetNamespace": "sso" }, + { + "name": "logging", + "path": "services/logging", + "targetNamespace": null + }, { "name": "longhorn-ui", "path": "infrastructure/longhorn/ui-ingress", @@ -81,6 +81,11 @@ "path": "services/mailu", "targetNamespace": "mailu-mailserver" }, + { + "name": "maintenance", + "path": "services/maintenance", + "targetNamespace": null + }, { "name": "metallb", "path": "infrastructure/metallb", @@ -116,11 +121,26 @@ "path": "services/openldap", "targetNamespace": "sso" }, + { + "name": "outline", + "path": "services/outline", + "targetNamespace": "outline" + }, { "name": "pegasus", "path": "services/pegasus", "targetNamespace": "jellyfin" }, + { + "name": "planka", + "path": "services/planka", + "targetNamespace": "planka" + }, + { + "name": "postgres", + "path": "infrastructure/postgres", + "targetNamespace": "postgres" + }, { "name": "sui-metrics", "path": "services/sui-metrics/overlays/atlas", @@ -163,7 +183,7 @@ "serviceAccountName": null, "nodeSelector": {}, "images": [ - "ollama/ollama:latest" + "ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d" ] }, { @@ -179,7 +199,7 @@ "node-role.kubernetes.io/worker": "true" }, "images": [ - "registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-84" + "registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92" ] }, { @@ -195,7 +215,7 @@ "node-role.kubernetes.io/worker": "true" }, "images": [ - "registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-84" + "registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92" ] }, { @@ -214,21 +234,6 @@ "python:3.11-slim" ] }, - { - "kind": "Deployment", - "namespace": "ci-demo", - "name": "ci-demo", - "labels": { - "app.kubernetes.io/name": "ci-demo" - }, - "serviceAccountName": null, - "nodeSelector": { - "hardware": "rpi4" - }, - "images": [ - "registry.bstein.dev/infra/ci-demo:v0.0.0-3" - ] - }, { "kind": "Deployment", "namespace": "comms", @@ -271,7 +276,7 @@ "hardware": "rpi5" }, "images": [ - "ghcr.io/element-hq/element-call:latest" + "ghcr.io/element-hq/element-call@sha256:e6897c7818331714eae19d83ef8ea94a8b41115f0d8d3f62c2fed2d02c65c9bc" ] }, { @@ -345,56 +350,6 @@ "nginx:1.27-alpine" ] }, - { - "kind": "Deployment", - "namespace": "comms", - "name": "othrys-element-element-web", - "labels": { - "app.kubernetes.io/instance": "othrys-element", - "app.kubernetes.io/name": "element-web" - }, - "serviceAccountName": "othrys-element-element-web", - "nodeSelector": { - "hardware": "rpi5" - }, - "images": [ - "ghcr.io/element-hq/element-web:v1.12.6" - ] - }, - { - "kind": "Deployment", - "namespace": "comms", - "name": "othrys-synapse-matrix-synapse", - "labels": { - "app.kubernetes.io/component": "synapse", - "app.kubernetes.io/instance": "othrys-synapse", - "app.kubernetes.io/name": "matrix-synapse" - }, - "serviceAccountName": "default", - "nodeSelector": { - "hardware": "rpi5" - }, - "images": [ - "ghcr.io/element-hq/synapse:v1.144.0" - ] - }, - { - "kind": "Deployment", - "namespace": "comms", - "name": "othrys-synapse-redis-master", - "labels": { - "app.kubernetes.io/component": "master", - "app.kubernetes.io/instance": "othrys-synapse", - "app.kubernetes.io/managed-by": "Helm", - "app.kubernetes.io/name": "redis", - "helm.sh/chart": "redis-17.17.1" - }, - "serviceAccountName": "othrys-synapse-redis", - "nodeSelector": {}, - "images": [ - "docker.io/bitnamilegacy/redis:7.0.12-debian-11-r34" - ] - }, { "kind": "DaemonSet", "namespace": "crypto", @@ -407,7 +362,7 @@ "node-role.kubernetes.io/worker": "true" }, "images": [ - "ghcr.io/tari-project/xmrig:latest" + "ghcr.io/tari-project/xmrig@sha256:80defbfd0b640d604c91cb5101d3642db7928e1e68ee3c6b011289b3565a39d9" ] }, { @@ -681,6 +636,66 @@ "hashicorp/vault-csi-provider:1.7.0" ] }, + { + "kind": "DaemonSet", + "namespace": "logging", + "name": "node-image-gc-rpi4", + "labels": { + "app": "node-image-gc-rpi4" + }, + "serviceAccountName": "node-image-gc-rpi4", + "nodeSelector": { + "hardware": "rpi4" + }, + "images": [ + "bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131" + ] + }, + { + "kind": "DaemonSet", + "namespace": "logging", + "name": "node-image-prune-rpi5", + "labels": { + "app": "node-image-prune-rpi5" + }, + "serviceAccountName": "node-image-prune-rpi5", + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131" + ] + }, + { + "kind": "DaemonSet", + "namespace": "logging", + "name": "node-log-rotation", + "labels": { + "app": "node-log-rotation" + }, + "serviceAccountName": "node-log-rotation", + "nodeSelector": { + "hardware": "rpi5" + }, + "images": [ + "bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131" + ] + }, + { + "kind": "Deployment", + "namespace": "logging", + "name": "oauth2-proxy-logs", + "labels": { + "app": "oauth2-proxy-logs" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "quay.io/oauth2-proxy/oauth2-proxy:v7.6.0" + ] + }, { "kind": "Deployment", "namespace": "longhorn-system", @@ -708,7 +723,7 @@ "mailu.bstein.dev/vip": "true" }, "images": [ - "lachlanevenson/k8s-kubectl:latest" + "registry.bstein.dev/bstein/kubectl:1.35.0" ] }, { @@ -726,37 +741,30 @@ }, { "kind": "DaemonSet", - "namespace": "metallb-system", - "name": "metallb-speaker", + "namespace": "maintenance", + "name": "node-image-sweeper", "labels": { - "app.kubernetes.io/component": "speaker", - "app.kubernetes.io/instance": "metallb", - "app.kubernetes.io/name": "metallb" + "app": "node-image-sweeper" }, - "serviceAccountName": "metallb-speaker", + "serviceAccountName": "node-image-sweeper", "nodeSelector": { "kubernetes.io/os": "linux" }, "images": [ - "quay.io/frrouting/frr:10.4.1", - "quay.io/metallb/speaker:v0.15.3" + "python:3.12.9-alpine3.20" ] }, { - "kind": "Deployment", - "namespace": "metallb-system", - "name": "metallb-controller", + "kind": "DaemonSet", + "namespace": "maintenance", + "name": "node-nofile", "labels": { - "app.kubernetes.io/component": "controller", - "app.kubernetes.io/instance": "metallb", - "app.kubernetes.io/name": "metallb" - }, - "serviceAccountName": "metallb-controller", - "nodeSelector": { - "kubernetes.io/os": "linux" + "app": "node-nofile" }, + "serviceAccountName": "node-nofile", + "nodeSelector": {}, "images": [ - "quay.io/metallb/controller:v0.15.3" + "bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131" ] }, { @@ -772,6 +780,21 @@ "registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04" ] }, + { + "kind": "DaemonSet", + "namespace": "monitoring", + "name": "jetson-tegrastats-exporter", + "labels": { + "app": "jetson-tegrastats-exporter" + }, + "serviceAccountName": "default", + "nodeSelector": { + "jetson": "true" + }, + "images": [ + "python:3.10-slim" + ] + }, { "kind": "Deployment", "namespace": "monitoring", @@ -797,7 +820,7 @@ "hardware": "rpi5" }, "images": [ - "collabora/code:latest" + "collabora/code@sha256:3c58d0e9bae75e4647467d0c7d91cb66f261d3e814709aed590b5c334a04db26" ] }, { @@ -815,6 +838,66 @@ "nextcloud:29-apache" ] }, + { + "kind": "Deployment", + "namespace": "outline", + "name": "outline", + "labels": { + "app": "outline" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "outlinewiki/outline:1.2.0" + ] + }, + { + "kind": "Deployment", + "namespace": "outline", + "name": "outline-redis", + "labels": { + "app": "outline-redis" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "redis:7.4.1-alpine" + ] + }, + { + "kind": "Deployment", + "namespace": "planka", + "name": "planka", + "labels": { + "app": "planka" + }, + "serviceAccountName": null, + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "ghcr.io/plankanban/planka:2.0.0-rc.4" + ] + }, + { + "kind": "StatefulSet", + "namespace": "postgres", + "name": "postgres", + "labels": { + "app": "postgres" + }, + "serviceAccountName": "postgres-vault", + "nodeSelector": { + "node-role.kubernetes.io/worker": "true" + }, + "images": [ + "postgres:15" + ] + }, { "kind": "Deployment", "namespace": "sso", @@ -984,22 +1067,6 @@ } ] }, - { - "namespace": "ci-demo", - "name": "ci-demo", - "type": "ClusterIP", - "selector": { - "app.kubernetes.io/name": "ci-demo" - }, - "ports": [ - { - "name": "http", - "port": 80, - "targetPort": "http", - "protocol": "TCP" - } - ] - }, { "namespace": "comms", "name": "coturn", @@ -1454,94 +1521,6 @@ } ] }, - { - "namespace": "comms", - "name": "othrys-element-element-web", - "type": "ClusterIP", - "selector": { - "app.kubernetes.io/instance": "othrys-element", - "app.kubernetes.io/name": "element-web" - }, - "ports": [ - { - "name": "http", - "port": 80, - "targetPort": "http", - "protocol": "TCP" - } - ] - }, - { - "namespace": "comms", - "name": "othrys-synapse-matrix-synapse", - "type": "ClusterIP", - "selector": { - "app.kubernetes.io/component": "synapse", - "app.kubernetes.io/instance": "othrys-synapse", - "app.kubernetes.io/name": "matrix-synapse" - }, - "ports": [ - { - "name": "http", - "port": 8008, - "targetPort": "http", - "protocol": "TCP" - } - ] - }, - { - "namespace": "comms", - "name": "othrys-synapse-redis-headless", - "type": "ClusterIP", - "selector": { - "app.kubernetes.io/instance": "othrys-synapse", - "app.kubernetes.io/name": "redis" - }, - "ports": [ - { - "name": "tcp-redis", - "port": 6379, - "targetPort": "redis", - "protocol": "TCP" - } - ] - }, - { - "namespace": "comms", - "name": "othrys-synapse-redis-master", - "type": "ClusterIP", - "selector": { - "app.kubernetes.io/component": "master", - "app.kubernetes.io/instance": "othrys-synapse", - "app.kubernetes.io/name": "redis" - }, - "ports": [ - { - "name": "tcp-redis", - "port": 6379, - "targetPort": "redis", - "protocol": "TCP" - } - ] - }, - { - "namespace": "comms", - "name": "othrys-synapse-replication", - "type": "ClusterIP", - "selector": { - "app.kubernetes.io/component": "synapse", - "app.kubernetes.io/instance": "othrys-synapse", - "app.kubernetes.io/name": "matrix-synapse" - }, - "ports": [ - { - "name": "replication", - "port": 9093, - "targetPort": "replication", - "protocol": "TCP" - } - ] - }, { "namespace": "crypto", "name": "monerod", @@ -1743,6 +1722,22 @@ } ] }, + { + "namespace": "logging", + "name": "oauth2-proxy-logs", + "type": "ClusterIP", + "selector": { + "app": "oauth2-proxy-logs" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": 4180, + "protocol": "TCP" + } + ] + }, { "namespace": "longhorn-system", "name": "oauth2-proxy-longhorn", @@ -1823,24 +1818,6 @@ } ] }, - { - "namespace": "metallb-system", - "name": "metallb-webhook-service", - "type": "ClusterIP", - "selector": { - "app.kubernetes.io/component": "controller", - "app.kubernetes.io/instance": "metallb", - "app.kubernetes.io/name": "metallb" - }, - "ports": [ - { - "name": null, - "port": 443, - "targetPort": 9443, - "protocol": "TCP" - } - ] - }, { "namespace": "monitoring", "name": "dcgm-exporter", @@ -1857,6 +1834,22 @@ } ] }, + { + "namespace": "monitoring", + "name": "jetson-tegrastats-exporter", + "type": "ClusterIP", + "selector": { + "app": "jetson-tegrastats-exporter" + }, + "ports": [ + { + "name": "metrics", + "port": 9100, + "targetPort": "metrics", + "protocol": "TCP" + } + ] + }, { "namespace": "monitoring", "name": "postmark-exporter", @@ -1905,6 +1898,70 @@ } ] }, + { + "namespace": "outline", + "name": "outline", + "type": "ClusterIP", + "selector": { + "app": "outline" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "outline", + "name": "outline-redis", + "type": "ClusterIP", + "selector": { + "app": "outline-redis" + }, + "ports": [ + { + "name": "redis", + "port": 6379, + "targetPort": "redis", + "protocol": "TCP" + } + ] + }, + { + "namespace": "planka", + "name": "planka", + "type": "ClusterIP", + "selector": { + "app": "planka" + }, + "ports": [ + { + "name": "http", + "port": 80, + "targetPort": "http", + "protocol": "TCP" + } + ] + }, + { + "namespace": "postgres", + "name": "postgres-service", + "type": "ClusterIP", + "selector": { + "app": "postgres" + }, + "ports": [ + { + "name": "postgres", + "port": 5432, + "targetPort": 5432, + "protocol": "TCP" + } + ] + }, { "namespace": "sso", "name": "keycloak", @@ -2110,7 +2167,7 @@ "via": { "kind": "Ingress", "name": "matrix-wellknown-bstein-dev", - "source": "communication" + "source": "comms" } }, { @@ -2130,7 +2187,7 @@ "via": { "kind": "Ingress", "name": "matrix-wellknown-bstein-dev", - "source": "communication" + "source": "comms" } }, { @@ -2170,7 +2227,7 @@ "via": { "kind": "Ingress", "name": "element-call", - "source": "communication" + "source": "comms" } }, { @@ -2250,7 +2307,7 @@ "via": { "kind": "Ingress", "name": "livekit-jwt-ingress", - "source": "communication" + "source": "comms" } }, { @@ -2270,27 +2327,7 @@ "via": { "kind": "Ingress", "name": "livekit-ingress", - "source": "communication" - } - }, - { - "host": "live.bstein.dev", - "path": "/", - "backend": { - "namespace": "comms", - "service": "othrys-element-element-web", - "port": 80, - "workloads": [ - { - "kind": "Deployment", - "name": "othrys-element-element-web" - } - ] - }, - "via": { - "kind": "Ingress", - "name": "othrys-element-element-web", - "source": "communication" + "source": "comms" } }, { @@ -2310,7 +2347,7 @@ "via": { "kind": "Ingress", "name": "matrix-wellknown", - "source": "communication" + "source": "comms" } }, { @@ -2330,7 +2367,7 @@ "via": { "kind": "Ingress", "name": "matrix-wellknown", - "source": "communication" + "source": "comms" } }, { @@ -2340,17 +2377,32 @@ "namespace": "comms", "service": "othrys-synapse-matrix-synapse", "port": 8008, + "workloads": [] + }, + "via": { + "kind": "Ingress", + "name": "matrix-routing", + "source": "comms" + } + }, + { + "host": "logs.bstein.dev", + "path": "/", + "backend": { + "namespace": "logging", + "service": "oauth2-proxy-logs", + "port": "http", "workloads": [ { "kind": "Deployment", - "name": "othrys-synapse-matrix-synapse" + "name": "oauth2-proxy-logs" } ] }, "via": { "kind": "Ingress", - "name": "matrix-routing", - "source": "communication" + "name": "logs", + "source": "logging" } }, { @@ -2405,7 +2457,7 @@ "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2425,7 +2477,7 @@ "via": { "kind": "Ingress", "name": "matrix-wellknown-matrix-live", - "source": "communication" + "source": "comms" } }, { @@ -2445,7 +2497,7 @@ "via": { "kind": "Ingress", "name": "matrix-wellknown-matrix-live", - "source": "communication" + "source": "comms" } }, { @@ -2455,17 +2507,12 @@ "namespace": "comms", "service": "othrys-synapse-matrix-synapse", "port": 8008, - "workloads": [ - { - "kind": "Deployment", - "name": "othrys-synapse-matrix-synapse" - } - ] + "workloads": [] }, "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2485,7 +2532,7 @@ "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2505,7 +2552,7 @@ "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2525,7 +2572,7 @@ "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2545,7 +2592,7 @@ "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2565,7 +2612,7 @@ "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2575,17 +2622,12 @@ "namespace": "comms", "service": "othrys-synapse-matrix-synapse", "port": 8008, - "workloads": [ - { - "kind": "Deployment", - "name": "othrys-synapse-matrix-synapse" - } - ] + "workloads": [] }, "via": { "kind": "Ingress", "name": "matrix-routing", - "source": "communication" + "source": "comms" } }, { @@ -2608,6 +2650,26 @@ "source": "monerod" } }, + { + "host": "notes.bstein.dev", + "path": "/", + "backend": { + "namespace": "outline", + "service": "outline", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "outline" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "outline", + "source": "outline" + } + }, { "host": "office.bstein.dev", "path": "/", @@ -2728,6 +2790,26 @@ "source": "jellyfin" } }, + { + "host": "tasks.bstein.dev", + "path": "/", + "backend": { + "namespace": "planka", + "service": "planka", + "port": 80, + "workloads": [ + { + "kind": "Deployment", + "name": "planka" + } + ] + }, + "via": { + "kind": "Ingress", + "name": "planka", + "source": "planka" + } + }, { "host": "vault.bstein.dev", "path": "/", @@ -2750,12 +2832,27 @@ } ], "helmrelease_host_hints": { + "comms:comms/othrys-element": [ + "call.live.bstein.dev", + "live.bstein.dev", + "matrix.live.bstein.dev" + ], + "comms:comms/othrys-synapse": [ + "bstein.dev", + "kit.live.bstein.dev", + "live.bstein.dev", + "matrix.live.bstein.dev", + "turn.live.bstein.dev" + ], "gitops-ui:flux-system/weave-gitops": [ "cd.bstein.dev" ], "harbor:harbor/harbor": [ "registry.bstein.dev" ], + "logging:logging/data-prepper": [ + "registry.bstein.dev" + ], "mailu:mailu-mailserver/mailu": [ "bstein.dev", "mail.bstein.dev" @@ -2764,6 +2861,7 @@ "alerts.bstein.dev" ], "monitoring:monitoring/grafana": [ + "bstein.dev", "metrics.bstein.dev", "sso.bstein.dev" ] diff --git a/services/comms/knowledge/catalog/atlas.yaml b/services/comms/knowledge/catalog/atlas.yaml index 06e2469..6529e1a 100644 --- a/services/comms/knowledge/catalog/atlas.yaml +++ b/services/comms/knowledge/catalog/atlas.yaml @@ -1,3 +1,4 @@ +# services/comms/knowledge/catalog/atlas.yaml # Generated by scripts/knowledge_render_atlas.py (do not edit by hand) cluster: atlas sources: @@ -7,10 +8,7 @@ sources: - name: bstein-dev-home path: services/bstein-dev-home targetNamespace: bstein-dev-home -- name: ci-demo - path: services/ci-demo - targetNamespace: null -- name: communication +- name: comms path: services/comms targetNamespace: comms - name: core @@ -43,12 +41,18 @@ sources: - name: keycloak path: services/keycloak targetNamespace: sso +- name: logging + path: services/logging + targetNamespace: null - name: longhorn-ui path: infrastructure/longhorn/ui-ingress targetNamespace: longhorn-system - name: mailu path: services/mailu targetNamespace: mailu-mailserver +- name: maintenance + path: services/maintenance + targetNamespace: null - name: metallb path: infrastructure/metallb targetNamespace: metallb-system @@ -70,9 +74,18 @@ sources: - name: openldap path: services/openldap targetNamespace: sso +- name: outline + path: services/outline + targetNamespace: outline - name: pegasus path: services/pegasus targetNamespace: jellyfin +- name: planka + path: services/planka + targetNamespace: planka +- name: postgres + path: infrastructure/postgres + targetNamespace: postgres - name: sui-metrics path: services/sui-metrics/overlays/atlas targetNamespace: sui-metrics @@ -100,7 +113,7 @@ workloads: serviceAccountName: null nodeSelector: {} images: - - ollama/ollama:latest + - ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d - kind: Deployment namespace: bstein-dev-home name: bstein-dev-home-backend @@ -111,7 +124,7 @@ workloads: kubernetes.io/arch: arm64 node-role.kubernetes.io/worker: 'true' images: - - registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-84 + - registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 - kind: Deployment namespace: bstein-dev-home name: bstein-dev-home-frontend @@ -122,7 +135,7 @@ workloads: kubernetes.io/arch: arm64 node-role.kubernetes.io/worker: 'true' images: - - registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-84 + - registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92 - kind: Deployment namespace: bstein-dev-home name: chat-ai-gateway @@ -134,16 +147,6 @@ workloads: node-role.kubernetes.io/worker: 'true' images: - python:3.11-slim -- kind: Deployment - namespace: ci-demo - name: ci-demo - labels: - app.kubernetes.io/name: ci-demo - serviceAccountName: null - nodeSelector: - hardware: rpi4 - images: - - registry.bstein.dev/infra/ci-demo:v0.0.0-3 - kind: Deployment namespace: comms name: atlasbot @@ -173,7 +176,7 @@ workloads: nodeSelector: hardware: rpi5 images: - - ghcr.io/element-hq/element-call:latest + - ghcr.io/element-hq/element-call@sha256:e6897c7818331714eae19d83ef8ea94a8b41115f0d8d3f62c2fed2d02c65c9bc - kind: Deployment namespace: comms name: livekit @@ -222,42 +225,6 @@ workloads: nodeSelector: {} images: - nginx:1.27-alpine -- kind: Deployment - namespace: comms - name: othrys-element-element-web - labels: - app.kubernetes.io/instance: othrys-element - app.kubernetes.io/name: element-web - serviceAccountName: othrys-element-element-web - nodeSelector: - hardware: rpi5 - images: - - ghcr.io/element-hq/element-web:v1.12.6 -- kind: Deployment - namespace: comms - name: othrys-synapse-matrix-synapse - labels: - app.kubernetes.io/component: synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/name: matrix-synapse - serviceAccountName: default - nodeSelector: - hardware: rpi5 - images: - - ghcr.io/element-hq/synapse:v1.144.0 -- kind: Deployment - namespace: comms - name: othrys-synapse-redis-master - labels: - app.kubernetes.io/component: master - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: redis - helm.sh/chart: redis-17.17.1 - serviceAccountName: othrys-synapse-redis - nodeSelector: {} - images: - - docker.io/bitnamilegacy/redis:7.0.12-debian-11-r34 - kind: DaemonSet namespace: crypto name: monero-xmrig @@ -267,7 +234,7 @@ workloads: nodeSelector: node-role.kubernetes.io/worker: 'true' images: - - ghcr.io/tari-project/xmrig:latest + - ghcr.io/tari-project/xmrig@sha256:80defbfd0b640d604c91cb5101d3642db7928e1e68ee3c6b011289b3565a39d9 - kind: Deployment namespace: crypto name: monero-p2pool @@ -460,6 +427,46 @@ workloads: kubernetes.io/os: linux images: - hashicorp/vault-csi-provider:1.7.0 +- kind: DaemonSet + namespace: logging + name: node-image-gc-rpi4 + labels: + app: node-image-gc-rpi4 + serviceAccountName: node-image-gc-rpi4 + nodeSelector: + hardware: rpi4 + images: + - bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 +- kind: DaemonSet + namespace: logging + name: node-image-prune-rpi5 + labels: + app: node-image-prune-rpi5 + serviceAccountName: node-image-prune-rpi5 + nodeSelector: + hardware: rpi5 + images: + - bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 +- kind: DaemonSet + namespace: logging + name: node-log-rotation + labels: + app: node-log-rotation + serviceAccountName: node-log-rotation + nodeSelector: + hardware: rpi5 + images: + - bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 +- kind: Deployment + namespace: logging + name: oauth2-proxy-logs + labels: + app: oauth2-proxy-logs + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 - kind: Deployment namespace: longhorn-system name: oauth2-proxy-longhorn @@ -479,7 +486,7 @@ workloads: nodeSelector: mailu.bstein.dev/vip: 'true' images: - - lachlanevenson/k8s-kubectl:latest + - registry.bstein.dev/bstein/kubectl:1.35.0 - kind: Deployment namespace: mailu-mailserver name: mailu-sync-listener @@ -490,30 +497,24 @@ workloads: images: - python:3.11-alpine - kind: DaemonSet - namespace: metallb-system - name: metallb-speaker + namespace: maintenance + name: node-image-sweeper labels: - app.kubernetes.io/component: speaker - app.kubernetes.io/instance: metallb - app.kubernetes.io/name: metallb - serviceAccountName: metallb-speaker + app: node-image-sweeper + serviceAccountName: node-image-sweeper nodeSelector: kubernetes.io/os: linux images: - - quay.io/frrouting/frr:10.4.1 - - quay.io/metallb/speaker:v0.15.3 -- kind: Deployment - namespace: metallb-system - name: metallb-controller + - python:3.12.9-alpine3.20 +- kind: DaemonSet + namespace: maintenance + name: node-nofile labels: - app.kubernetes.io/component: controller - app.kubernetes.io/instance: metallb - app.kubernetes.io/name: metallb - serviceAccountName: metallb-controller - nodeSelector: - kubernetes.io/os: linux + app: node-nofile + serviceAccountName: node-nofile + nodeSelector: {} images: - - quay.io/metallb/controller:v0.15.3 + - bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 - kind: DaemonSet namespace: monitoring name: dcgm-exporter @@ -523,6 +524,16 @@ workloads: nodeSelector: {} images: - registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04 +- kind: DaemonSet + namespace: monitoring + name: jetson-tegrastats-exporter + labels: + app: jetson-tegrastats-exporter + serviceAccountName: default + nodeSelector: + jetson: 'true' + images: + - python:3.10-slim - kind: Deployment namespace: monitoring name: postmark-exporter @@ -541,7 +552,7 @@ workloads: nodeSelector: hardware: rpi5 images: - - collabora/code:latest + - collabora/code@sha256:3c58d0e9bae75e4647467d0c7d91cb66f261d3e814709aed590b5c334a04db26 - kind: Deployment namespace: nextcloud name: nextcloud @@ -552,6 +563,46 @@ workloads: hardware: rpi5 images: - nextcloud:29-apache +- kind: Deployment + namespace: outline + name: outline + labels: + app: outline + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - outlinewiki/outline:1.2.0 +- kind: Deployment + namespace: outline + name: outline-redis + labels: + app: outline-redis + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - redis:7.4.1-alpine +- kind: Deployment + namespace: planka + name: planka + labels: + app: planka + serviceAccountName: null + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - ghcr.io/plankanban/planka:2.0.0-rc.4 +- kind: StatefulSet + namespace: postgres + name: postgres + labels: + app: postgres + serviceAccountName: postgres-vault + nodeSelector: + node-role.kubernetes.io/worker: 'true' + images: + - postgres:15 - kind: Deployment namespace: sso name: keycloak @@ -663,16 +714,6 @@ services: port: 80 targetPort: 8080 protocol: TCP -- namespace: ci-demo - name: ci-demo - type: ClusterIP - selector: - app.kubernetes.io/name: ci-demo - ports: - - name: http - port: 80 - targetPort: http - protocol: TCP - namespace: comms name: coturn type: LoadBalancer @@ -971,64 +1012,6 @@ services: port: 80 targetPort: 80 protocol: TCP -- namespace: comms - name: othrys-element-element-web - type: ClusterIP - selector: - app.kubernetes.io/instance: othrys-element - app.kubernetes.io/name: element-web - ports: - - name: http - port: 80 - targetPort: http - protocol: TCP -- namespace: comms - name: othrys-synapse-matrix-synapse - type: ClusterIP - selector: - app.kubernetes.io/component: synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/name: matrix-synapse - ports: - - name: http - port: 8008 - targetPort: http - protocol: TCP -- namespace: comms - name: othrys-synapse-redis-headless - type: ClusterIP - selector: - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/name: redis - ports: - - name: tcp-redis - port: 6379 - targetPort: redis - protocol: TCP -- namespace: comms - name: othrys-synapse-redis-master - type: ClusterIP - selector: - app.kubernetes.io/component: master - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/name: redis - ports: - - name: tcp-redis - port: 6379 - targetPort: redis - protocol: TCP -- namespace: comms - name: othrys-synapse-replication - type: ClusterIP - selector: - app.kubernetes.io/component: synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/name: matrix-synapse - ports: - - name: replication - port: 9093 - targetPort: replication - protocol: TCP - namespace: crypto name: monerod type: ClusterIP @@ -1156,6 +1139,16 @@ services: port: 443 targetPort: websecure protocol: TCP +- namespace: logging + name: oauth2-proxy-logs + type: ClusterIP + selector: + app: oauth2-proxy-logs + ports: + - name: http + port: 80 + targetPort: 4180 + protocol: TCP - namespace: longhorn-system name: oauth2-proxy-longhorn type: ClusterIP @@ -1208,18 +1201,6 @@ services: port: 8080 targetPort: 8080 protocol: TCP -- namespace: metallb-system - name: metallb-webhook-service - type: ClusterIP - selector: - app.kubernetes.io/component: controller - app.kubernetes.io/instance: metallb - app.kubernetes.io/name: metallb - ports: - - name: null - port: 443 - targetPort: 9443 - protocol: TCP - namespace: monitoring name: dcgm-exporter type: ClusterIP @@ -1230,6 +1211,16 @@ services: port: 9400 targetPort: metrics protocol: TCP +- namespace: monitoring + name: jetson-tegrastats-exporter + type: ClusterIP + selector: + app: jetson-tegrastats-exporter + ports: + - name: metrics + port: 9100 + targetPort: metrics + protocol: TCP - namespace: monitoring name: postmark-exporter type: ClusterIP @@ -1260,6 +1251,46 @@ services: port: 80 targetPort: http protocol: TCP +- namespace: outline + name: outline + type: ClusterIP + selector: + app: outline + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: outline + name: outline-redis + type: ClusterIP + selector: + app: outline-redis + ports: + - name: redis + port: 6379 + targetPort: redis + protocol: TCP +- namespace: planka + name: planka + type: ClusterIP + selector: + app: planka + ports: + - name: http + port: 80 + targetPort: http + protocol: TCP +- namespace: postgres + name: postgres-service + type: ClusterIP + selector: + app: postgres + ports: + - name: postgres + port: 5432 + targetPort: 5432 + protocol: TCP - namespace: sso name: keycloak type: ClusterIP @@ -1391,7 +1422,7 @@ http_endpoints: via: kind: Ingress name: matrix-wellknown-bstein-dev - source: communication + source: comms - host: bstein.dev path: /.well-known/matrix/server backend: @@ -1402,7 +1433,7 @@ http_endpoints: via: kind: Ingress name: matrix-wellknown-bstein-dev - source: communication + source: comms - host: bstein.dev path: /api backend: @@ -1428,7 +1459,7 @@ http_endpoints: via: kind: Ingress name: element-call - source: communication + source: comms - host: chat.ai.bstein.dev path: / backend: @@ -1480,7 +1511,7 @@ http_endpoints: via: kind: Ingress name: livekit-jwt-ingress - source: communication + source: comms - host: kit.live.bstein.dev path: /livekit/sfu backend: @@ -1493,20 +1524,7 @@ http_endpoints: via: kind: Ingress name: livekit-ingress - source: communication -- host: live.bstein.dev - path: / - backend: - namespace: comms - service: othrys-element-element-web - port: 80 - workloads: - - kind: Deployment - name: othrys-element-element-web - via: - kind: Ingress - name: othrys-element-element-web - source: communication + source: comms - host: live.bstein.dev path: /.well-known/matrix/client backend: @@ -1517,7 +1535,7 @@ http_endpoints: via: kind: Ingress name: matrix-wellknown - source: communication + source: comms - host: live.bstein.dev path: /.well-known/matrix/server backend: @@ -1528,20 +1546,31 @@ http_endpoints: via: kind: Ingress name: matrix-wellknown - source: communication + source: comms - host: live.bstein.dev path: /_matrix backend: namespace: comms service: othrys-synapse-matrix-synapse port: 8008 - workloads: &id002 - - kind: Deployment - name: othrys-synapse-matrix-synapse + workloads: [] via: kind: Ingress name: matrix-routing - source: communication + source: comms +- host: logs.bstein.dev + path: / + backend: + namespace: logging + service: oauth2-proxy-logs + port: http + workloads: + - kind: Deployment + name: oauth2-proxy-logs + via: + kind: Ingress + name: logs + source: logging - host: longhorn.bstein.dev path: / backend: @@ -1572,13 +1601,13 @@ http_endpoints: namespace: comms service: matrix-authentication-service port: 8080 - workloads: &id003 + workloads: &id002 - kind: Deployment name: matrix-authentication-service via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: matrix.live.bstein.dev path: /.well-known/matrix/client backend: @@ -1589,7 +1618,7 @@ http_endpoints: via: kind: Ingress name: matrix-wellknown-matrix-live - source: communication + source: comms - host: matrix.live.bstein.dev path: /.well-known/matrix/server backend: @@ -1600,86 +1629,86 @@ http_endpoints: via: kind: Ingress name: matrix-wellknown-matrix-live - source: communication + source: comms - host: matrix.live.bstein.dev path: /_matrix backend: namespace: comms service: othrys-synapse-matrix-synapse port: 8008 - workloads: *id002 + workloads: [] via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: matrix.live.bstein.dev path: /_matrix/client/r0/register backend: namespace: comms service: matrix-guest-register port: 8080 - workloads: &id004 + workloads: &id003 - kind: Deployment name: matrix-guest-register via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: matrix.live.bstein.dev path: /_matrix/client/v3/login backend: namespace: comms service: matrix-authentication-service port: 8080 - workloads: *id003 + workloads: *id002 via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: matrix.live.bstein.dev path: /_matrix/client/v3/logout backend: namespace: comms service: matrix-authentication-service port: 8080 - workloads: *id003 + workloads: *id002 via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: matrix.live.bstein.dev path: /_matrix/client/v3/refresh backend: namespace: comms service: matrix-authentication-service port: 8080 - workloads: *id003 + workloads: *id002 via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: matrix.live.bstein.dev path: /_matrix/client/v3/register backend: namespace: comms service: matrix-guest-register port: 8080 - workloads: *id004 + workloads: *id003 via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: matrix.live.bstein.dev path: /_synapse backend: namespace: comms service: othrys-synapse-matrix-synapse port: 8008 - workloads: *id002 + workloads: [] via: kind: Ingress name: matrix-routing - source: communication + source: comms - host: monero.bstein.dev path: / backend: @@ -1693,6 +1722,19 @@ http_endpoints: kind: Ingress name: monerod source: monerod +- host: notes.bstein.dev + path: / + backend: + namespace: outline + service: outline + port: 80 + workloads: + - kind: Deployment + name: outline + via: + kind: Ingress + name: outline + source: outline - host: office.bstein.dev path: / backend: @@ -1771,6 +1813,19 @@ http_endpoints: kind: Ingress name: jellyfin source: jellyfin +- host: tasks.bstein.dev + path: / + backend: + namespace: planka + service: planka + port: 80 + workloads: + - kind: Deployment + name: planka + via: + kind: Ingress + name: planka + source: planka - host: vault.bstein.dev path: / backend: @@ -1785,15 +1840,28 @@ http_endpoints: name: vaultwarden-ingress source: vaultwarden helmrelease_host_hints: + comms:comms/othrys-element: + - call.live.bstein.dev + - live.bstein.dev + - matrix.live.bstein.dev + comms:comms/othrys-synapse: + - bstein.dev + - kit.live.bstein.dev + - live.bstein.dev + - matrix.live.bstein.dev + - turn.live.bstein.dev gitops-ui:flux-system/weave-gitops: - cd.bstein.dev harbor:harbor/harbor: - registry.bstein.dev + logging:logging/data-prepper: + - registry.bstein.dev mailu:mailu-mailserver/mailu: - bstein.dev - mail.bstein.dev monitoring:monitoring/alertmanager: - alerts.bstein.dev monitoring:monitoring/grafana: + - bstein.dev - metrics.bstein.dev - sso.bstein.dev diff --git a/services/comms/knowledge/diagrams/atlas-http.mmd b/services/comms/knowledge/diagrams/atlas-http.mmd index ddd33d8..ab7c362 100644 --- a/services/comms/knowledge/diagrams/atlas-http.mmd +++ b/services/comms/knowledge/diagrams/atlas-http.mmd @@ -47,15 +47,14 @@ flowchart LR wl_comms_livekit["comms/livekit (Deployment)"] svc_comms_livekit --> wl_comms_livekit host_live_bstein_dev["live.bstein.dev"] - svc_comms_othrys_element_element_web["comms/othrys-element-element-web (Service)"] - host_live_bstein_dev --> svc_comms_othrys_element_element_web - wl_comms_othrys_element_element_web["comms/othrys-element-element-web (Deployment)"] - svc_comms_othrys_element_element_web --> wl_comms_othrys_element_element_web host_live_bstein_dev --> svc_comms_matrix_wellknown svc_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Service)"] host_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse - wl_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Deployment)"] - svc_comms_othrys_synapse_matrix_synapse --> wl_comms_othrys_synapse_matrix_synapse + host_logs_bstein_dev["logs.bstein.dev"] + svc_logging_oauth2_proxy_logs["logging/oauth2-proxy-logs (Service)"] + host_logs_bstein_dev --> svc_logging_oauth2_proxy_logs + wl_logging_oauth2_proxy_logs["logging/oauth2-proxy-logs (Deployment)"] + svc_logging_oauth2_proxy_logs --> wl_logging_oauth2_proxy_logs host_longhorn_bstein_dev["longhorn.bstein.dev"] svc_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Service)"] host_longhorn_bstein_dev --> svc_longhorn_system_oauth2_proxy_longhorn @@ -80,6 +79,11 @@ flowchart LR host_monero_bstein_dev --> svc_crypto_monerod wl_crypto_monerod["crypto/monerod (Deployment)"] svc_crypto_monerod --> wl_crypto_monerod + host_notes_bstein_dev["notes.bstein.dev"] + svc_outline_outline["outline/outline (Service)"] + host_notes_bstein_dev --> svc_outline_outline + wl_outline_outline["outline/outline (Deployment)"] + svc_outline_outline --> wl_outline_outline host_office_bstein_dev["office.bstein.dev"] svc_nextcloud_collabora["nextcloud/collabora (Service)"] host_office_bstein_dev --> svc_nextcloud_collabora @@ -110,6 +114,11 @@ flowchart LR host_stream_bstein_dev --> svc_jellyfin_jellyfin wl_jellyfin_jellyfin["jellyfin/jellyfin (Deployment)"] svc_jellyfin_jellyfin --> wl_jellyfin_jellyfin + host_tasks_bstein_dev["tasks.bstein.dev"] + svc_planka_planka["planka/planka (Service)"] + host_tasks_bstein_dev --> svc_planka_planka + wl_planka_planka["planka/planka (Deployment)"] + svc_planka_planka --> wl_planka_planka host_vault_bstein_dev["vault.bstein.dev"] svc_vaultwarden_vaultwarden_service["vaultwarden/vaultwarden-service (Service)"] host_vault_bstein_dev --> svc_vaultwarden_vaultwarden_service @@ -133,10 +142,7 @@ flowchart LR wl_comms_livekit_token_service svc_comms_livekit wl_comms_livekit - svc_comms_othrys_element_element_web - wl_comms_othrys_element_element_web svc_comms_othrys_synapse_matrix_synapse - wl_comms_othrys_synapse_matrix_synapse svc_comms_matrix_authentication_service wl_comms_matrix_authentication_service svc_comms_matrix_guest_register @@ -160,6 +166,10 @@ flowchart LR svc_jenkins_jenkins wl_jenkins_jenkins end + subgraph logging[logging] + svc_logging_oauth2_proxy_logs + wl_logging_oauth2_proxy_logs + end subgraph longhorn_system[longhorn-system] svc_longhorn_system_oauth2_proxy_longhorn wl_longhorn_system_oauth2_proxy_longhorn @@ -173,6 +183,14 @@ flowchart LR svc_nextcloud_collabora wl_nextcloud_collabora end + subgraph outline[outline] + svc_outline_outline + wl_outline_outline + end + subgraph planka[planka] + svc_planka_planka + wl_planka_planka + end subgraph sso[sso] svc_sso_oauth2_proxy wl_sso_oauth2_proxy diff --git a/services/comms/kustomization.yaml b/services/comms/kustomization.yaml index 2008843..3360067 100644 --- a/services/comms/kustomization.yaml +++ b/services/comms/kustomization.yaml @@ -4,8 +4,10 @@ kind: Kustomization namespace: comms resources: - namespace.yaml + - serviceaccount.yaml + - secretproviderclass.yaml - mas-configmap.yaml - - element-rendered.yaml + - helmrelease.yaml - livekit-config.yaml - element-call-config.yaml - element-call-deployment.yaml @@ -17,6 +19,8 @@ resources: - mas-secrets-ensure-rbac.yaml - comms-secrets-ensure-rbac.yaml - mas-db-ensure-rbac.yaml + - synapse-signingkey-ensure-rbac.yaml + - vault-sync-deployment.yaml - mas-admin-client-secret-ensure-job.yaml - mas-db-ensure-job.yaml - comms-secrets-ensure-job.yaml @@ -24,7 +28,6 @@ resources: - synapse-seeder-admin-ensure-job.yaml - synapse-user-seed-job.yaml - mas-local-users-ensure-job.yaml - - synapse-rendered.yaml - mas-deployment.yaml - livekit-token-deployment.yaml - livekit.yaml @@ -39,10 +42,17 @@ resources: - livekit-middlewares.yaml - matrix-ingress.yaml -patches: - - path: synapse-deployment-strategy-patch.yaml - configMapGenerator: + - name: comms-vault-env + files: + - comms_vault_env.sh=scripts/comms_vault_env.sh + options: + disableNameSuffixHash: true + - name: comms-vault-entrypoint + files: + - scripts/vault-entrypoint.sh + options: + disableNameSuffixHash: true - name: matrix-guest-register files: - server.py=scripts/guest-register/server.py @@ -53,24 +63,9 @@ configMapGenerator: - bot.py=scripts/atlasbot/bot.py options: disableNameSuffixHash: true - - name: othrys-synapse-redis-health + - name: othrys-element-host-config files: - - ping_readiness_local.sh=scripts/synapse/redis/ping_readiness_local.sh - - ping_liveness_local.sh=scripts/synapse/redis/ping_liveness_local.sh - - ping_readiness_master.sh=scripts/synapse/redis/ping_readiness_master.sh - - ping_liveness_master.sh=scripts/synapse/redis/ping_liveness_master.sh - - ping_readiness_local_and_master.sh=scripts/synapse/redis/ping_readiness_local_and_master.sh - - ping_liveness_local_and_master.sh=scripts/synapse/redis/ping_liveness_local_and_master.sh - options: - disableNameSuffixHash: true - - name: othrys-synapse-redis-scripts - files: - - start-master.sh=scripts/synapse/redis/start-master.sh - options: - disableNameSuffixHash: true - - name: othrys-synapse-matrix-synapse-scripts - files: - - signing-key.sh=scripts/synapse/signing-key.sh + - 20-host-config.sh=scripts/element-host-config.sh options: disableNameSuffixHash: true - name: atlas-kb diff --git a/services/comms/livekit-token-deployment.yaml b/services/comms/livekit-token-deployment.yaml index 1b4cdca..a23cf62 100644 --- a/services/comms/livekit-token-deployment.yaml +++ b/services/comms/livekit-token-deployment.yaml @@ -14,7 +14,24 @@ spec: metadata: labels: app: livekit-token-service + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-livekit-env: "kv/data/atlas/comms/livekit-api" + vault.hashicorp.com/agent-inject-template-livekit-env: | + {{- with secret "kv/data/atlas/comms/livekit-api" -}} + export LIVEKIT_SECRET="{{ .Data.data.primary }}" + {{- end -}} spec: + serviceAccountName: comms-vault + imagePullSecrets: + - name: harbor-regcred + hostAliases: + - ip: "10.43.6.87" + hostnames: + - live.bstein.dev + - matrix.live.bstein.dev + - kit.live.bstein.dev nodeSelector: hardware: rpi5 affinity: @@ -26,23 +43,16 @@ spec: - key: hardware operator: In values: ["rpi5","rpi4"] - hostAliases: - - ip: 10.43.60.6 - hostnames: - - live.bstein.dev containers: - name: token-service - image: ghcr.io/element-hq/lk-jwt-service:0.3.0 + image: registry.bstein.dev/tools/lk-jwt-service-vault:0.3.0 env: - name: LIVEKIT_URL value: wss://kit.live.bstein.dev/livekit/sfu - name: LIVEKIT_KEY value: primary - - name: LIVEKIT_SECRET - valueFrom: - secretKeyRef: - name: livekit-api - key: primary + - name: VAULT_ENV_FILE + value: /vault/secrets/livekit-env - name: LIVEKIT_FULL_ACCESS_HOMESERVERS value: live.bstein.dev ports: @@ -55,6 +65,7 @@ spec: limits: cpu: 300m memory: 256Mi + volumes: --- apiVersion: v1 kind: Service diff --git a/services/comms/livekit.yaml b/services/comms/livekit.yaml index 46d57f8..4a5eb8d 100644 --- a/services/comms/livekit.yaml +++ b/services/comms/livekit.yaml @@ -13,10 +13,47 @@ spec: template: metadata: annotations: - checksum/config: livekit-config-v5 + checksum/config: livekit-config-v6 + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-init-first: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" + vault.hashicorp.com/agent-inject-template-turn-secret: | + {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" + vault.hashicorp.com/agent-inject-template-livekit-primary: | + {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-bot-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-seeder-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-matrix: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-homepage: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" + vault.hashicorp.com/agent-inject-template-mas-admin-secret: | + {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" + vault.hashicorp.com/agent-inject-template-synapse-db-pass: | + {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" + vault.hashicorp.com/agent-inject-template-mas-db-pass: | + {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-kc-secret: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} labels: app: livekit spec: + serviceAccountName: comms-vault enableServiceLinks: false nodeSelector: hardware: rpi5 @@ -36,16 +73,11 @@ spec: args: - | set -euo pipefail + . /vault/scripts/comms_vault_env.sh umask 077 TURN_PASSWORD_ESCAPED="$(printf '%s' "${TURN_PASSWORD}" | sed 's/[\\/&]/\\&/g')" sed "s/@@TURN_PASSWORD@@/${TURN_PASSWORD_ESCAPED}/g" /etc/livekit-template/livekit.yaml > /etc/livekit/livekit.yaml chmod 0644 /etc/livekit/livekit.yaml - env: - - name: TURN_PASSWORD - valueFrom: - secretKeyRef: - name: turn-shared-secret - key: TURN_STATIC_AUTH_SECRET volumeMounts: - name: config-template mountPath: /etc/livekit-template @@ -53,6 +85,9 @@ spec: - name: config mountPath: /etc/livekit readOnly: false + - name: vault-scripts + mountPath: /vault/scripts + readOnly: true containers: - name: livekit image: livekit/livekit-server:v1.9.0 @@ -61,6 +96,7 @@ spec: - -c - | set -euo pipefail + . /vault/scripts/comms_vault_env.sh umask 077 printf "%s: %s\n" "${LIVEKIT_API_KEY_ID}" "${LIVEKIT_API_SECRET}" > /var/run/livekit/keys chmod 600 /var/run/livekit/keys @@ -68,11 +104,6 @@ spec: env: - name: LIVEKIT_API_KEY_ID value: primary - - name: LIVEKIT_API_SECRET - valueFrom: - secretKeyRef: - name: livekit-api - key: primary ports: - containerPort: 7880 name: http @@ -92,6 +123,9 @@ spec: readOnly: true - name: runtime-keys mountPath: /var/run/livekit + - name: vault-scripts + mountPath: /vault/scripts + readOnly: true resources: requests: cpu: 500m @@ -110,6 +144,10 @@ spec: emptyDir: {} - name: runtime-keys emptyDir: {} + - name: vault-scripts + configMap: + name: comms-vault-env + defaultMode: 0555 --- apiVersion: v1 kind: Service diff --git a/services/comms/mas-admin-client-secret-ensure-job.yaml b/services/comms/mas-admin-client-secret-ensure-job.yaml index 3843877..7b05cca 100644 --- a/services/comms/mas-admin-client-secret-ensure-job.yaml +++ b/services/comms/mas-admin-client-secret-ensure-job.yaml @@ -4,6 +4,8 @@ kind: ServiceAccount metadata: name: mas-admin-client-secret-writer namespace: comms +imagePullSecrets: + - name: harbor-regcred --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role @@ -36,7 +38,7 @@ subjects: apiVersion: batch/v1 kind: Job metadata: - name: mas-admin-client-secret-ensure-7 + name: mas-admin-client-secret-ensure-11 namespace: comms spec: backoffLimit: 2 @@ -44,6 +46,20 @@ spec: spec: serviceAccountName: mas-admin-client-secret-writer restartPolicy: OnFailure + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] volumes: - name: work emptyDir: {} @@ -62,23 +78,32 @@ spec: mountPath: /work containers: - name: patch - image: bitnami/kubectl:latest + image: registry.bstein.dev/bstein/kubectl:1.35.0 command: ["/bin/sh", "-c"] args: - | set -euo pipefail - if kubectl -n comms get secret mas-admin-client-runtime >/dev/null 2>&1; then - if kubectl -n comms get secret mas-admin-client-runtime -o jsonpath='{.data.client_secret}' 2>/dev/null | grep -q .; then - exit 0 - fi - else - kubectl -n comms create secret generic mas-admin-client-runtime \ - --from-file=client_secret=/work/client_secret >/dev/null + vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}" + vault_role="${VAULT_ROLE:-comms-secrets}" + jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" + login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')" + vault_token="$(curl -sS --request POST --data "${login_payload}" \ + "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')" + if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then + echo "vault login failed" >&2 + exit 1 + fi + + current="$(curl -sS -H "X-Vault-Token: ${vault_token}" \ + "${vault_addr}/v1/kv/data/atlas/comms/mas-admin-client-runtime" | jq -r '.data.data.client_secret // empty')" + if [ -n "${current}" ]; then exit 0 fi - secret_b64="$(base64 /work/client_secret | tr -d '\n')" - payload="$(printf '{"data":{"client_secret":"%s"}}' "${secret_b64}")" - kubectl -n comms patch secret mas-admin-client-runtime --type=merge -p "${payload}" >/dev/null + + value="$(cat /work/client_secret)" + payload="$(jq -nc --arg value "${value}" '{data:{client_secret:$value}}')" + curl -sS -X POST -H "X-Vault-Token: ${vault_token}" \ + -d "${payload}" "${vault_addr}/v1/kv/data/atlas/comms/mas-admin-client-runtime" >/dev/null volumeMounts: - name: work mountPath: /work diff --git a/services/comms/mas-configmap.yaml b/services/comms/mas-configmap.yaml index a41ebeb..5e6cfdd 100644 --- a/services/comms/mas-configmap.yaml +++ b/services/comms/mas-configmap.yaml @@ -31,13 +31,13 @@ data: clients: - client_id: 01KDXMVQBQ5JNY6SEJPZW6Z8BM client_auth_method: client_secret_basic - client_secret_file: /etc/mas/admin-client/client_secret + client_secret_file: /vault/secrets/mas-admin-secret secrets: - encryption_file: /etc/mas/secrets/encryption + encryption_file: /vault/secrets/mas-encryption keys: - kid: "othrys-rsa-1" - key_file: /etc/mas/keys/rsa_key + key_file: /vault/secrets/mas-rsa-key passwords: enabled: true diff --git a/services/comms/mas-db-ensure-job.yaml b/services/comms/mas-db-ensure-job.yaml index 1c8b5c4..56707a9 100644 --- a/services/comms/mas-db-ensure-job.yaml +++ b/services/comms/mas-db-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-db-ensure-16 + name: mas-db-ensure-22 namespace: comms spec: backoffLimit: 1 @@ -11,9 +11,23 @@ spec: spec: serviceAccountName: mas-db-ensure restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] containers: - name: ensure - image: bitnami/kubectl:latest + image: registry.bstein.dev/bstein/kubectl:1.35.0 command: ["/bin/sh", "-c"] args: - | @@ -24,18 +38,33 @@ spec: head -c 32 /dev/urandom | base64 | tr -d '\n' | tr '+/' '-_' | tr -d '=' } - EXISTING_B64="$(kubectl -n comms get secret mas-db -o jsonpath='{.data.password}' 2>/dev/null || true)" - if [ -n "${EXISTING_B64}" ]; then - MAS_PASS="$(printf '%s' "${EXISTING_B64}" | base64 -d)" - if printf '%s' "${MAS_PASS}" | grep -Eq '[^A-Za-z0-9_-]'; then - MAS_PASS="$(safe_pass)" - MAS_B64="$(printf '%s' "${MAS_PASS}" | base64 | tr -d '\n')" - payload="$(printf '{"data":{"password":"%s"}}' "${MAS_B64}")" - kubectl -n comms patch secret mas-db --type=merge -p "${payload}" >/dev/null - fi - else + vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}" + vault_role="${VAULT_ROLE:-comms-secrets}" + jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" + login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')" + vault_token="$(curl -sS --request POST --data "${login_payload}" \ + "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')" + if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then + echo "vault login failed" >&2 + exit 1 + fi + + vault_read() { + curl -sS -H "X-Vault-Token: ${vault_token}" \ + "${vault_addr}/v1/kv/data/atlas/comms/mas-db" | jq -r '.data.data.password // empty' + } + + vault_write() { + value="$1" + payload="$(jq -nc --arg value "${value}" '{data:{password:$value}}')" + curl -sS -X POST -H "X-Vault-Token: ${vault_token}" \ + -d "${payload}" "${vault_addr}/v1/kv/data/atlas/comms/mas-db" >/dev/null + } + + MAS_PASS="$(vault_read)" + if [ -z "${MAS_PASS}" ] || printf '%s' "${MAS_PASS}" | grep -Eq '[^A-Za-z0-9_-]'; then MAS_PASS="$(safe_pass)" - kubectl -n comms create secret generic mas-db --from-literal=password="${MAS_PASS}" >/dev/null + vault_write "${MAS_PASS}" fi POD_NAME="$(kubectl -n postgres get pods -l app=postgres -o jsonpath='{.items[0].metadata.name}')" diff --git a/services/comms/mas-db-ensure-rbac.yaml b/services/comms/mas-db-ensure-rbac.yaml index 19691d7..c8093b5 100644 --- a/services/comms/mas-db-ensure-rbac.yaml +++ b/services/comms/mas-db-ensure-rbac.yaml @@ -4,6 +4,8 @@ kind: ServiceAccount metadata: name: mas-db-ensure namespace: comms +imagePullSecrets: + - name: harbor-regcred --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/services/comms/mas-deployment.yaml b/services/comms/mas-deployment.yaml index 2117c17..e5387e2 100644 --- a/services/comms/mas-deployment.yaml +++ b/services/comms/mas-deployment.yaml @@ -13,11 +13,59 @@ spec: template: metadata: annotations: - checksum/config: v5-adminapi-7 + checksum/config: v5-adminapi-9 + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-init-first: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" + vault.hashicorp.com/agent-inject-template-turn-secret: | + {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" + vault.hashicorp.com/agent-inject-template-livekit-primary: | + {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-bot-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-seeder-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-matrix: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-homepage: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" + vault.hashicorp.com/agent-inject-template-mas-admin-secret: | + {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" + vault.hashicorp.com/agent-inject-template-synapse-db-pass: | + {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" + vault.hashicorp.com/agent-inject-template-mas-db-pass: | + {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-kc-secret: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-encryption: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-encryption: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.encryption }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-rsa-key: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-rsa-key: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.rsa_key }}{{- end -}} labels: app: matrix-authentication-service spec: enableServiceLinks: false + serviceAccountName: comms-vault + hostAliases: + - ip: "10.43.216.45" + hostnames: + - "othrys-synapse-matrix-synapse" + - "othrys-synapse-matrix-synapse.comms.svc.cluster.local" nodeSelector: hardware: rpi5 affinity: @@ -36,6 +84,7 @@ spec: args: - | set -euo pipefail + . /vault/scripts/comms_vault_env.sh umask 077 DB_PASS_ESCAPED="$(printf '%s' "${MAS_DB_PASSWORD}" | sed 's/[\\/&]/\\&/g')" MATRIX_SECRET_ESCAPED="$(printf '%s' "${MATRIX_SHARED_SECRET}" | sed 's/[\\/&]/\\&/g')" @@ -47,22 +96,6 @@ spec: -e "s/@@KEYCLOAK_CLIENT_SECRET@@/${KC_SECRET_ESCAPED}/g" \ /etc/mas/config.yaml > /rendered/config.yaml chmod 0644 /rendered/config.yaml - env: - - name: MAS_DB_PASSWORD - valueFrom: - secretKeyRef: - name: mas-db - key: password - - name: MATRIX_SHARED_SECRET - valueFrom: - secretKeyRef: - name: mas-secrets-runtime - key: matrix_shared_secret - - name: KEYCLOAK_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: mas-secrets-runtime - key: keycloak_client_secret volumeMounts: - name: config mountPath: /etc/mas/config.yaml @@ -71,6 +104,9 @@ spec: - name: rendered mountPath: /rendered readOnly: false + - name: vault-scripts + mountPath: /vault/scripts + readOnly: true containers: - name: mas image: ghcr.io/element-hq/matrix-authentication-service:1.8.0 @@ -86,15 +122,6 @@ spec: - name: rendered mountPath: /rendered readOnly: true - - name: secrets - mountPath: /etc/mas/secrets - readOnly: true - - name: admin-client - mountPath: /etc/mas/admin-client - readOnly: true - - name: keys - mountPath: /etc/mas/keys - readOnly: true resources: requests: cpu: 200m @@ -111,28 +138,10 @@ spec: path: config.yaml - name: rendered emptyDir: {} - - name: secrets - secret: - secretName: mas-secrets-runtime - items: - - key: encryption - path: encryption - - key: matrix_shared_secret - path: matrix_shared_secret - - key: keycloak_client_secret - path: keycloak_client_secret - - name: keys - secret: - secretName: mas-secrets-runtime - items: - - key: rsa_key - path: rsa_key - - name: admin-client - secret: - secretName: mas-admin-client-runtime - items: - - key: client_secret - path: client_secret + - name: vault-scripts + configMap: + name: comms-vault-env + defaultMode: 0555 --- apiVersion: v1 kind: Service diff --git a/services/comms/mas-local-users-ensure-job.yaml b/services/comms/mas-local-users-ensure-job.yaml index e462426..5802009 100644 --- a/services/comms/mas-local-users-ensure-job.yaml +++ b/services/comms/mas-local-users-ensure-job.yaml @@ -2,56 +2,98 @@ apiVersion: batch/v1 kind: Job metadata: - name: mas-local-users-ensure-5 + name: mas-local-users-ensure-15 namespace: comms spec: backoffLimit: 1 ttlSecondsAfterFinished: 3600 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" + vault.hashicorp.com/agent-inject-template-turn-secret: | + {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" + vault.hashicorp.com/agent-inject-template-livekit-primary: | + {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-bot-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-seeder-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-matrix: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-homepage: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" + vault.hashicorp.com/agent-inject-template-mas-admin-secret: | + {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" + vault.hashicorp.com/agent-inject-template-synapse-db-pass: | + {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" + vault.hashicorp.com/agent-inject-template-mas-db-pass: | + {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-kc-secret: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} spec: restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + serviceAccountName: comms-vault volumes: - - name: mas-admin-client - secret: - secretName: mas-admin-client-runtime - items: - - key: client_secret - path: client_secret + - name: vault-scripts + configMap: + name: comms-vault-env + defaultMode: 0555 containers: - name: ensure image: python:3.11-slim volumeMounts: - - name: mas-admin-client - mountPath: /etc/mas-admin-client + - name: vault-scripts + mountPath: /vault/scripts readOnly: true env: - name: MAS_ADMIN_CLIENT_ID value: 01KDXMVQBQ5JNY6SEJPZW6Z8BM - name: MAS_ADMIN_CLIENT_SECRET_FILE - value: /etc/mas-admin-client/client_secret + value: /vault/secrets/mas-admin-secret - name: MAS_TOKEN_URL value: http://matrix-authentication-service:8080/oauth2/token - name: MAS_ADMIN_API_BASE value: http://matrix-authentication-service:8081/api/admin/v1 - name: SEEDER_USER value: othrys-seeder - - name: SEEDER_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: seeder-password - name: BOT_USER value: atlasbot - - name: BOT_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: bot-password command: - /bin/sh - -c - | set -euo pipefail + . /vault/scripts/comms_vault_env.sh pip install --no-cache-dir requests >/dev/null python - <<'PY' import base64 @@ -67,6 +109,27 @@ spec: AUTH_BASE = "http://matrix-authentication-service:8080" SERVER_NAME = "live.bstein.dev" + def wait_for_service(url): + last = None + for attempt in range(1, 11): + try: + requests.get(url, timeout=10) + return + except Exception as exc: # noqa: BLE001 + last = exc + time.sleep(attempt * 2) + raise RuntimeError(f"MAS service not reachable: {last}") + + def request_with_retry(method, url, attempts=6, **kwargs): + last = None + for attempt in range(1, attempts + 1): + try: + return requests.request(method, url, **kwargs) + except requests.RequestException as exc: + last = exc + time.sleep(attempt * 2) + raise RuntimeError(f"request failed for {url}: {last}") + def admin_token(): with open(MAS_ADMIN_CLIENT_SECRET_FILE, "r", encoding="utf-8") as f: secret = f.read().strip() @@ -88,7 +151,8 @@ spec: raise RuntimeError(f"MAS admin token request failed: {last}") def get_user(token, username): - r = requests.get( + r = request_with_retry( + "GET", f"{MAS_ADMIN_API_BASE}/users/by-username/{urllib.parse.quote(username)}", headers={"Authorization": f"Bearer {token}"}, timeout=30, @@ -112,7 +176,8 @@ spec: {"username": username, "password": password}, ] for payload in payloads: - r = requests.post( + r = request_with_retry( + "POST", f"{MAS_ADMIN_API_BASE}/users", headers={"Authorization": f"Bearer {token}"}, json=payload, @@ -125,7 +190,8 @@ spec: return None def update_password(token, user_id, password): - r = requests.post( + r = request_with_retry( + "POST", f"{MAS_ADMIN_API_BASE}/users/{urllib.parse.quote(user_id)}/set-password", headers={"Authorization": f"Bearer {token}"}, json={"password": password}, @@ -144,7 +210,8 @@ spec: login_name = username if not login_name.startswith("@"): login_name = f"@{login_name}:{SERVER_NAME}" - r = requests.post( + r = request_with_retry( + "POST", f"{AUTH_BASE}/_matrix/client/v3/login", json={ "type": "m.login.password", @@ -156,6 +223,7 @@ spec: if r.status_code != 200: raise RuntimeError(f"login failed for {username}: {r.status_code} {r.text}") + wait_for_service(MAS_ADMIN_API_BASE) token = admin_token() ensure_user(token, os.environ["SEEDER_USER"], os.environ["SEEDER_PASS"]) ensure_user(token, os.environ["BOT_USER"], os.environ["BOT_PASS"]) diff --git a/services/comms/matrix-ingress.yaml b/services/comms/matrix-ingress.yaml index caaa593..cf3d198 100644 --- a/services/comms/matrix-ingress.yaml +++ b/services/comms/matrix-ingress.yaml @@ -81,6 +81,41 @@ spec: - host: live.bstein.dev http: paths: + - path: /_matrix/client/v3/register + pathType: Prefix + backend: + service: + name: matrix-guest-register + port: + number: 8080 + - path: /_matrix/client/r0/register + pathType: Prefix + backend: + service: + name: matrix-guest-register + port: + number: 8080 + - path: /_matrix/client/v3/login + pathType: Prefix + backend: + service: + name: matrix-authentication-service + port: + number: 8080 + - path: /_matrix/client/v3/logout + pathType: Exact + backend: + service: + name: matrix-authentication-service + port: + number: 8080 + - path: /_matrix/client/v3/refresh + pathType: Exact + backend: + service: + name: matrix-authentication-service + port: + number: 8080 - path: /_matrix pathType: Prefix backend: diff --git a/services/comms/othrys-kick-numeric-job.yaml b/services/comms/othrys-kick-numeric-job.yaml index 8f02bbb..0d3914a 100644 --- a/services/comms/othrys-kick-numeric-job.yaml +++ b/services/comms/othrys-kick-numeric-job.yaml @@ -2,13 +2,66 @@ apiVersion: batch/v1 kind: Job metadata: - name: othrys-kick-numeric-1 + name: othrys-kick-numeric-8 namespace: comms spec: backoffLimit: 0 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" + vault.hashicorp.com/agent-inject-template-turn-secret: | + {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" + vault.hashicorp.com/agent-inject-template-livekit-primary: | + {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-bot-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-seeder-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-matrix: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-homepage: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" + vault.hashicorp.com/agent-inject-template-mas-admin-secret: | + {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" + vault.hashicorp.com/agent-inject-template-synapse-db-pass: | + {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" + vault.hashicorp.com/agent-inject-template-mas-db-pass: | + {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-kc-secret: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} spec: restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + serviceAccountName: comms-vault containers: - name: kick image: python:3.11-slim @@ -23,16 +76,12 @@ spec: value: "#othrys:live.bstein.dev" - name: SEEDER_USER value: othrys-seeder - - name: SEEDER_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: seeder-password command: - /bin/sh - -c - | set -euo pipefail + . /vault/scripts/comms_vault_env.sh pip install --no-cache-dir requests >/dev/null python - <<'PY' import os @@ -58,6 +107,17 @@ spec: def auth(token): return {"Authorization": f"Bearer {token}"} + def wait_for_service(url): + last = None + for attempt in range(1, 11): + try: + requests.get(url, timeout=10) + return + except Exception as exc: # noqa: BLE001 + last = exc + time.sleep(attempt * 2) + raise SystemExit(f"MAS service not reachable: {last}") + def login(user, password): r = requests.post( f"{AUTH_BASE}/_matrix/client/v3/login", @@ -105,6 +165,7 @@ spec: if r.status_code not in (200, 202): raise SystemExit(f"kick {user_id} failed: {r.status_code} {r.text}") + wait_for_service(f"{AUTH_BASE}/_matrix/client/versions") token = login(SEEDER_USER, SEEDER_PASS) room_id = resolve_alias(token, ROOM_ALIAS) for user_id in list_members(token, room_id): @@ -113,3 +174,12 @@ spec: if is_numeric(user_id): kick(token, room_id, user_id) PY + volumeMounts: + - name: vault-scripts + mountPath: /vault/scripts + readOnly: true + volumes: + - name: vault-scripts + configMap: + name: comms-vault-env + defaultMode: 0555 diff --git a/services/comms/pin-othrys-job.yaml b/services/comms/pin-othrys-job.yaml index 3639194..2b29ca3 100644 --- a/services/comms/pin-othrys-job.yaml +++ b/services/comms/pin-othrys-job.yaml @@ -4,6 +4,8 @@ kind: CronJob metadata: name: pin-othrys-invite namespace: comms + labels: + atlas.bstein.dev/glue: "true" spec: schedule: "*/30 * * * *" suspend: true @@ -14,8 +16,47 @@ spec: spec: backoffLimit: 0 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" + vault.hashicorp.com/agent-inject-template-turn-secret: | + {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" + vault.hashicorp.com/agent-inject-template-livekit-primary: | + {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-bot-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-seeder-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-matrix: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-homepage: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" + vault.hashicorp.com/agent-inject-template-mas-admin-secret: | + {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" + vault.hashicorp.com/agent-inject-template-synapse-db-pass: | + {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" + vault.hashicorp.com/agent-inject-template-mas-db-pass: | + {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-kc-secret: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} spec: restartPolicy: Never + serviceAccountName: comms-vault containers: - name: pin image: python:3.11-slim @@ -26,16 +67,12 @@ spec: value: http://matrix-authentication-service:8080 - name: SEEDER_USER value: othrys-seeder - - name: SEEDER_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: seeder-password command: - /bin/sh - -c - | set -euo pipefail + . /vault/scripts/comms_vault_env.sh pip install --no-cache-dir requests >/dev/null python - <<'PY' import os, requests, urllib.parse @@ -121,3 +158,12 @@ spec: eid = send(room_id, token, MESSAGE) pin(room_id, token, eid) PY + volumeMounts: + - name: vault-scripts + mountPath: /vault/scripts + readOnly: true + volumes: + - name: vault-scripts + configMap: + name: comms-vault-env + defaultMode: 0555 diff --git a/services/comms/reset-othrys-room-job.yaml b/services/comms/reset-othrys-room-job.yaml index dd056c3..ae8585a 100644 --- a/services/comms/reset-othrys-room-job.yaml +++ b/services/comms/reset-othrys-room-job.yaml @@ -4,6 +4,8 @@ kind: CronJob metadata: name: othrys-room-reset namespace: comms + labels: + atlas.bstein.dev/glue: "true" spec: schedule: "0 0 1 1 *" suspend: true @@ -14,8 +16,47 @@ spec: spec: backoffLimit: 0 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" + vault.hashicorp.com/agent-inject-template-turn-secret: | + {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" + vault.hashicorp.com/agent-inject-template-livekit-primary: | + {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-bot-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-seeder-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-matrix: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-homepage: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" + vault.hashicorp.com/agent-inject-template-mas-admin-secret: | + {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" + vault.hashicorp.com/agent-inject-template-synapse-db-pass: | + {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" + vault.hashicorp.com/agent-inject-template-mas-db-pass: | + {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-kc-secret: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} spec: restartPolicy: Never + serviceAccountName: comms-vault containers: - name: reset image: python:3.11-slim @@ -34,11 +75,6 @@ spec: value: "Invite guests: share https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join and choose 'Continue' -> 'Join as guest'." - name: SEEDER_USER value: othrys-seeder - - name: SEEDER_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: seeder-password - name: BOT_USER value: atlasbot command: @@ -46,6 +82,7 @@ spec: - -c - | set -euo pipefail + . /vault/scripts/comms_vault_env.sh pip install --no-cache-dir requests >/dev/null python - <<'PY' import os @@ -264,3 +301,12 @@ spec: print(f"old_room_id={old_room_id}") print(f"new_room_id={new_room_id}") PY + volumeMounts: + - name: vault-scripts + mountPath: /vault/scripts + readOnly: true + volumes: + - name: vault-scripts + configMap: + name: comms-vault-env + defaultMode: 0555 diff --git a/services/comms/scripts/comms_vault_env.sh b/services/comms/scripts/comms_vault_env.sh new file mode 100644 index 0000000..72319bc --- /dev/null +++ b/services/comms/scripts/comms_vault_env.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env sh +set -eu + +vault_dir="/vault/secrets" + +read_secret() { + tr -d '\r\n' < "${vault_dir}/$1" +} + +export TURN_STATIC_AUTH_SECRET="$(read_secret turn-secret)" +export TURN_PASSWORD="${TURN_STATIC_AUTH_SECRET}" + +export LIVEKIT_API_SECRET="$(read_secret livekit-primary)" +export LIVEKIT_SECRET="${LIVEKIT_API_SECRET}" + +export BOT_PASS="$(read_secret bot-pass)" +export SEEDER_PASS="$(read_secret seeder-pass)" + +export CHAT_API_KEY="$(read_secret chat-matrix)" +export CHAT_API_HOMEPAGE="$(read_secret chat-homepage)" + +export MAS_ADMIN_CLIENT_SECRET_FILE="${vault_dir}/mas-admin-secret" +export PGPASSWORD="$(read_secret synapse-db-pass)" + +export MAS_DB_PASSWORD="$(read_secret mas-db-pass)" +export MATRIX_SHARED_SECRET="$(read_secret mas-matrix-shared)" +export KEYCLOAK_CLIENT_SECRET="$(read_secret mas-kc-secret)" diff --git a/services/comms/scripts/element-host-config.sh b/services/comms/scripts/element-host-config.sh new file mode 100644 index 0000000..1c6a488 --- /dev/null +++ b/services/comms/scripts/element-host-config.sh @@ -0,0 +1,9 @@ +#!/bin/sh +set -eu + +HOST_CONFIG="/tmp/element-web-config/config.live.bstein.dev.json" +BASE_CONFIG="/tmp/element-web-config/config.json" + +if [ -f "$BASE_CONFIG" ]; then + cp -f "$BASE_CONFIG" "$HOST_CONFIG" +fi diff --git a/services/comms/scripts/guest-register/server.py b/services/comms/scripts/guest-register/server.py index 0e1fb4c..b1f6490 100644 --- a/services/comms/scripts/guest-register/server.py +++ b/services/comms/scripts/guest-register/server.py @@ -3,6 +3,7 @@ import json import os import random import secrets +import time from http.server import BaseHTTPRequestHandler, HTTPServer from urllib import error, parse, request @@ -29,6 +30,20 @@ NOUN = [ "pine","quartz","reef","ridge","sable","sage","shore","thunder","vale","zephyr", ] +def _open_with_retry(req, timeout, attempts=6): + last = None + for attempt in range(1, attempts + 1): + try: + return request.urlopen(req, timeout=timeout) + except error.HTTPError as e: + return e + except (error.URLError, TimeoutError, OSError) as e: + last = e + time.sleep(attempt * 2) + if last: + raise last + raise RuntimeError("request_failed") + def _json(method, url, *, headers=None, body=None, timeout=20): hdrs = {"Content-Type": "application/json"} if headers: @@ -37,18 +52,17 @@ def _json(method, url, *, headers=None, body=None, timeout=20): if body is not None: data = json.dumps(body).encode() req = request.Request(url, data=data, headers=hdrs, method=method) - try: - with request.urlopen(req, timeout=timeout) as resp: - raw = resp.read() - payload = json.loads(raw.decode()) if raw else {} - return resp.status, payload - except error.HTTPError as e: - raw = e.read() + resp = _open_with_retry(req, timeout) + if isinstance(resp, error.HTTPError): + raw = resp.read() try: payload = json.loads(raw.decode()) if raw else {} except Exception: payload = {} - return e.code, payload + return resp.code, payload + raw = resp.read() + payload = json.loads(raw.decode()) if raw else {} + return resp.status, payload def _form(method, url, *, headers=None, fields=None, timeout=20): hdrs = {"Content-Type": "application/x-www-form-urlencoded"} @@ -56,18 +70,17 @@ def _form(method, url, *, headers=None, fields=None, timeout=20): hdrs.update(headers) data = parse.urlencode(fields or {}).encode() req = request.Request(url, data=data, headers=hdrs, method=method) - try: - with request.urlopen(req, timeout=timeout) as resp: - raw = resp.read() - payload = json.loads(raw.decode()) if raw else {} - return resp.status, payload - except error.HTTPError as e: - raw = e.read() + resp = _open_with_retry(req, timeout) + if isinstance(resp, error.HTTPError): + raw = resp.read() try: payload = json.loads(raw.decode()) if raw else {} except Exception: payload = {} - return e.code, payload + return resp.code, payload + raw = resp.read() + payload = json.loads(raw.decode()) if raw else {} + return resp.status, payload _admin_token = None _admin_token_at = 0.0 @@ -110,12 +123,28 @@ def _admin_api(admin_token, method, path, body=None): timeout=20, ) -def _create_user(admin_token, username): - status, payload = _admin_api(admin_token, "POST", "/users", {"username": username}) - if status != 201: - return status, None - user = payload.get("data") or {} - return status, user.get("id") +def _create_user(admin_token, username, password): + payloads = [ + { + "data": { + "type": "user", + "attributes": { + "username": username, + "password": password, + }, + } + }, + {"username": username, "password": password}, + {"username": username}, + ] + for payload in payloads: + status, body = _admin_api(admin_token, "POST", "/users", payload) + if status in (200, 201): + user = body.get("data") or {} + return status, user.get("id") + if status == 409: + return status, None + return status, None def _set_password(admin_token, user_id, password): status, _payload = _admin_api( @@ -127,20 +156,28 @@ def _set_password(admin_token, user_id, password): return status in (200, 204) def _login_password(username, password): - payload = { - "type": "m.login.password", - "identifier": {"type": "m.id.user", "user": f"@{username}:{SERVER_NAME}"}, - "password": password, - } - status, data = _json( - "POST", - f"{MAS_BASE}/_matrix/client/v3/login", - body=payload, - timeout=20, - ) - if status != 200: - return None, None - return data.get("access_token"), data.get("device_id") + payloads = [ + { + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": f"@{username}:{SERVER_NAME}"}, + "password": password, + }, + { + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": username}, + "password": password, + }, + ] + for payload in payloads: + status, data = _json( + "POST", + f"{MAS_BASE}/_matrix/client/v3/login", + body=payload, + timeout=20, + ) + if status == 200: + return data.get("access_token"), data.get("device_id") + return None, None def _set_display_name(access_token, user_id, displayname): _json( @@ -224,18 +261,18 @@ class Handler(BaseHTTPRequestHandler): admin_token = _mas_admin_access_token(now) displayname = _generate_displayname() + password = secrets.token_urlsafe(18) localpart = None mas_user_id = None for _ in range(5): localpart = _generate_localpart() - status, mas_user_id = _create_user(admin_token, localpart) - if status == 201 and mas_user_id: + status, mas_user_id = _create_user(admin_token, localpart, password) + if status in (200, 201) and mas_user_id: break mas_user_id = None if not mas_user_id or not localpart: raise RuntimeError("add_user_failed") - password = secrets.token_urlsafe(18) if not _set_password(admin_token, mas_user_id, password): raise RuntimeError("set_password_failed") access_token, device_id = _login_password(localpart, password) @@ -258,7 +295,11 @@ class Handler(BaseHTTPRequestHandler): def main(): port = int(os.environ.get("PORT", "8080")) - HTTPServer(("0.0.0.0", port), Handler).serve_forever() + try: + from http.server import ThreadingHTTPServer as _Server + except Exception: + _Server = HTTPServer + _Server(("0.0.0.0", port), Handler).serve_forever() if __name__ == "__main__": main() diff --git a/services/comms/scripts/vault-entrypoint.sh b/services/comms/scripts/vault-entrypoint.sh new file mode 100644 index 0000000..fa3b791 --- /dev/null +++ b/services/comms/scripts/vault-entrypoint.sh @@ -0,0 +1,34 @@ +#!/bin/sh +set -eu + +if [ -n "${VAULT_ENV_FILE:-}" ]; then + if [ -f "${VAULT_ENV_FILE}" ]; then + # shellcheck disable=SC1090 + . "${VAULT_ENV_FILE}" + else + echo "Vault env file not found: ${VAULT_ENV_FILE}" >&2 + exit 1 + fi +fi + +if [ -n "${VAULT_COPY_FILES:-}" ]; then + old_ifs="$IFS" + IFS=',' + for pair in ${VAULT_COPY_FILES}; do + src="${pair%%:*}" + dest="${pair#*:}" + if [ -z "${src}" ] || [ -z "${dest}" ]; then + echo "Vault copy entry malformed: ${pair}" >&2 + exit 1 + fi + if [ ! -f "${src}" ]; then + echo "Vault file not found: ${src}" >&2 + exit 1 + fi + mkdir -p "$(dirname "${dest}")" + cp "${src}" "${dest}" + done + IFS="$old_ifs" +fi + +exec "$@" diff --git a/services/comms/secretproviderclass.yaml b/services/comms/secretproviderclass.yaml new file mode 100644 index 0000000..69d4b2b --- /dev/null +++ b/services/comms/secretproviderclass.yaml @@ -0,0 +1,21 @@ +# services/comms/secretproviderclass.yaml +apiVersion: secrets-store.csi.x-k8s.io/v1 +kind: SecretProviderClass +metadata: + name: comms-vault + namespace: comms +spec: + provider: vault + parameters: + vaultAddress: "http://vault.vault.svc.cluster.local:8200" + roleName: "comms" + objects: | + - objectName: "harbor-pull__dockerconfigjson" + secretPath: "kv/data/atlas/harbor-pull/comms" + secretKey: "dockerconfigjson" + secretObjects: + - secretName: harbor-regcred + type: kubernetes.io/dockerconfigjson + data: + - objectName: harbor-pull__dockerconfigjson + key: .dockerconfigjson diff --git a/services/comms/seed-othrys-room.yaml b/services/comms/seed-othrys-room.yaml index 901f14d..804d330 100644 --- a/services/comms/seed-othrys-room.yaml +++ b/services/comms/seed-othrys-room.yaml @@ -4,6 +4,8 @@ kind: CronJob metadata: name: seed-othrys-room namespace: comms + labels: + atlas.bstein.dev/glue: "true" spec: schedule: "*/10 * * * *" suspend: true @@ -12,8 +14,47 @@ spec: spec: backoffLimit: 0 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" + vault.hashicorp.com/agent-inject-template-turn-secret: | + {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" + vault.hashicorp.com/agent-inject-template-livekit-primary: | + {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-bot-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-seeder-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-matrix: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-homepage: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" + vault.hashicorp.com/agent-inject-template-mas-admin-secret: | + {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" + vault.hashicorp.com/agent-inject-template-synapse-db-pass: | + {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" + vault.hashicorp.com/agent-inject-template-mas-db-pass: | + {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-kc-secret: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} spec: restartPolicy: Never + serviceAccountName: comms-vault containers: - name: seed image: python:3.11-slim @@ -24,23 +65,14 @@ spec: value: http://matrix-authentication-service:8080 - name: SEEDER_USER value: othrys-seeder - - name: SEEDER_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: seeder-password - name: BOT_USER value: atlasbot - - name: BOT_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: bot-password command: - /bin/sh - -c - | set -euo pipefail + . /vault/scripts/comms_vault_env.sh pip install --no-cache-dir requests pyyaml >/dev/null python - <<'PY' import os, requests, urllib.parse @@ -140,7 +172,14 @@ spec: - name: synapse-config mountPath: /config readOnly: true + - name: vault-scripts + mountPath: /vault/scripts + readOnly: true volumes: - name: synapse-config secret: secretName: othrys-synapse-matrix-synapse + - name: vault-scripts + configMap: + name: comms-vault-env + defaultMode: 0555 diff --git a/services/comms/serviceaccount.yaml b/services/comms/serviceaccount.yaml new file mode 100644 index 0000000..ee5eabb --- /dev/null +++ b/services/comms/serviceaccount.yaml @@ -0,0 +1,8 @@ +# services/comms/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: comms-vault + namespace: comms +imagePullSecrets: + - name: harbor-regcred diff --git a/services/comms/synapse-deployment-strategy-patch.yaml b/services/comms/synapse-deployment-strategy-patch.yaml deleted file mode 100644 index 59b8e32..0000000 --- a/services/comms/synapse-deployment-strategy-patch.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# services/comms/synapse-deployment-strategy-patch.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: othrys-synapse-matrix-synapse -spec: - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 0 - maxUnavailable: 1 diff --git a/services/comms/synapse-rendered.yaml b/services/comms/synapse-rendered.yaml deleted file mode 100644 index 83fce79..0000000 --- a/services/comms/synapse-rendered.yaml +++ /dev/null @@ -1,895 +0,0 @@ ---- -# Source: matrix-synapse/charts/redis/templates/serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -automountServiceAccountToken: true -metadata: - name: othrys-synapse-redis - labels: - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: redis - helm.sh/chart: redis-17.17.1 ---- -# Source: matrix-synapse/templates/secrets.yaml -apiVersion: v1 -kind: Secret -metadata: - name: othrys-synapse-matrix-synapse - labels: - helm.sh/chart: matrix-synapse-3.12.17 - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/version: "1.144.0" - app.kubernetes.io/managed-by: Helm -stringData: - config.yaml: | - ## Registration ## - - ## API Configuration ## - - ## Database configuration ## - - database: - name: "psycopg2" - args: - user: "synapse" - password: "@@POSTGRES_PASSWORD@@" - database: "synapse" - host: "postgres-service.postgres.svc.cluster.local" - port: 5432 - sslmode: "prefer" - cp_min: 5 - cp_max: 10 - - - ## Redis configuration ## - - redis: - enabled: true - host: "othrys-synapse-redis-master" - port: 6379 - password: "@@REDIS_PASSWORD@@" ---- -# Source: matrix-synapse/charts/redis/templates/configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: othrys-synapse-redis-configuration - labels: - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: redis - helm.sh/chart: redis-17.17.1 -data: - redis.conf: |- - # User-supplied common configuration: - # Enable AOF https://redis.io/topics/persistence#append-only-file - appendonly yes - # Disable RDB persistence, AOF persistence already enabled. - save "" - # End of common configuration - master.conf: |- - dir /data - # User-supplied master configuration: - rename-command FLUSHDB "" - rename-command FLUSHALL "" - # End of master configuration - replica.conf: |- - dir /data - # User-supplied replica configuration: - rename-command FLUSHDB "" - rename-command FLUSHALL "" - # End of replica configuration ---- -# Source: matrix-synapse/templates/configuration.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: othrys-synapse-matrix-synapse - labels: - helm.sh/chart: matrix-synapse-3.12.17 - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/version: "1.144.0" - app.kubernetes.io/managed-by: Helm -data: - log.yaml: | - version: 1 - formatters: - precise: - format: '%(asctime)s - %(name)s - %(lineno)d - %(levelname)s - %(request)s- %(message)s' - filters: - context: - (): synapse.util.logcontext.LoggingContextFilter - request: "" - handlers: - console: - class: logging.StreamHandler - formatter: precise - filters: [context] - level: INFO - loggers: - synapse: - level: INFO - root: - level: INFO - handlers: [console] - homeserver.yaml: | - # NOTE: - # Secrets are stored in separate configs to better fit K8s concepts - - ## Server ## - - server_name: "live.bstein.dev" - public_baseurl: "https://matrix.live.bstein.dev" - pid_file: /homeserver.pid - web_client: False - soft_file_limit: 0 - log_config: "/synapse/config/log.yaml" - report_stats: false - - instance_map: - main: - host: othrys-synapse-replication - port: 9093 - - ## Ports ## - - listeners: - - port: 8008 - tls: false - bind_addresses: ["0.0.0.0"] - type: http - x_forwarded: true - - resources: - - names: - - client - - federation - compress: false - - - port: 9090 - tls: false - bind_addresses: ["::"] - type: http - - resources: - - names: [metrics] - compress: false - - - port: 9093 - tls: false - bind_addresses: ["::"] - type: http - - resources: - - names: [replication] - compress: false - - ## Files ## - - media_store_path: "/synapse/data/media" - uploads_path: "/synapse/data/uploads" - - ## Registration ## - - enable_registration: false - - ## Metrics ### - - enable_metrics: true - - ## Signing Keys ## - - signing_key_path: "/synapse/keys/signing.key" - macaroon_secret_key: "@@MACAROON_SECRET_KEY@@" - - # The trusted servers to download signing keys from. - trusted_key_servers: - - server_name: matrix.org - - ## Workers ## - - ## Extra config ## - - allow_guest_access: true - allow_public_rooms_without_auth: true - auto_join_rooms: - - "#othrys:live.bstein.dev" - autocreate_auto_join_rooms: true - default_room_version: "11" - experimental_features: - msc3266_enabled: true - msc4108_enabled: true - msc4143_enabled: true - msc4222_enabled: true - max_event_delay_duration: 24h - password_config: - enabled: false - turn_uris: - - "turn:turn.live.bstein.dev:3478?transport=udp" - - "turn:turn.live.bstein.dev:3478?transport=tcp" - - "turns:turn.live.bstein.dev:5349?transport=tcp" - turn_shared_secret: "@@TURN_SECRET@@" - turn_allow_guests: true - turn_user_lifetime: 86400000 - rc_login: - address: - burst_count: 20 - per_second: 5 - account: - burst_count: 20 - per_second: 5 - failed_attempts: - burst_count: 20 - per_second: 5 - rc_message: - per_second: 0.5 - burst_count: 30 - rc_delayed_event_mgmt: - per_second: 1 - burst_count: 20 - room_list_publication_rules: - - action: allow - well_known_client: - "m.homeserver": - "base_url": "https://matrix.live.bstein.dev" - "org.matrix.msc2965.authentication": - "issuer": "https://matrix.live.bstein.dev/" - "account": "https://matrix.live.bstein.dev/account/" - "org.matrix.msc4143.rtc_foci": - - type: "livekit" - livekit_service_url: "https://kit.live.bstein.dev/livekit/jwt" - - matrix_authentication_service: - enabled: true - endpoint: http://matrix-authentication-service:8080/ - secret: "@@MAS_SHARED_SECRET@@" ---- -# Source: matrix-synapse/templates/pvc.yaml -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: othrys-synapse-matrix-synapse - labels: - helm.sh/chart: matrix-synapse-3.12.17 - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/version: "1.144.0" - app.kubernetes.io/managed-by: Helm -spec: - accessModes: - - "ReadWriteOnce" - resources: - requests: - storage: "50Gi" - storageClassName: "asteria" ---- -# Source: matrix-synapse/charts/redis/templates/headless-svc.yaml -apiVersion: v1 -kind: Service -metadata: - name: othrys-synapse-redis-headless - labels: - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: redis - helm.sh/chart: redis-17.17.1 - annotations: - -spec: - type: ClusterIP - clusterIP: None - ports: - - name: tcp-redis - port: 6379 - targetPort: redis - selector: - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/name: redis ---- -# Source: matrix-synapse/charts/redis/templates/master/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: othrys-synapse-redis-master - labels: - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: redis - helm.sh/chart: redis-17.17.1 - app.kubernetes.io/component: master -spec: - type: ClusterIP - internalTrafficPolicy: Cluster - sessionAffinity: None - ports: - - name: tcp-redis - port: 6379 - targetPort: redis - nodePort: null - selector: - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/name: redis - app.kubernetes.io/component: master ---- -# Source: matrix-synapse/templates/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: othrys-synapse-matrix-synapse - labels: - helm.sh/chart: matrix-synapse-3.12.17 - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/version: "1.144.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 8008 - targetPort: http - protocol: TCP - name: http - selector: - app.kubernetes.io/component: synapse - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse ---- -# Source: matrix-synapse/templates/service.yaml -apiVersion: v1 -kind: Service -metadata: - name: othrys-synapse-replication - labels: - helm.sh/chart: matrix-synapse-3.12.17 - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/version: "1.144.0" - app.kubernetes.io/managed-by: Helm -spec: - type: ClusterIP - ports: - - port: 9093 - targetPort: replication - protocol: TCP - name: replication - selector: - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/component: synapse ---- -# Source: matrix-synapse/charts/redis/templates/master/application.yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: othrys-synapse-redis-master - labels: - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: redis - helm.sh/chart: redis-17.17.1 - app.kubernetes.io/component: master -spec: - replicas: 1 - selector: - matchLabels: - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/name: redis - app.kubernetes.io/component: master - strategy: - type: RollingUpdate - template: - metadata: - labels: - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/name: redis - helm.sh/chart: redis-17.17.1 - app.kubernetes.io/component: master - annotations: - checksum/configmap: 86bcc953bb473748a3d3dc60b7c11f34e60c93519234d4c37f42e22ada559d47 - checksum/health: aff24913d801436ea469d8d374b2ddb3ec4c43ee7ab24663d5f8ff1a1b6991a9 - checksum/scripts: 560c33ff34d845009b51830c332aa05fa211444d1877d3526d3599be7543aaa5 - checksum/secret: 44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a - spec: - - securityContext: - fsGroup: 1001 - serviceAccountName: othrys-synapse-redis - automountServiceAccountToken: true - affinity: - podAffinity: - - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - podAffinityTerm: - labelSelector: - matchLabels: - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/name: redis - app.kubernetes.io/component: master - topologyKey: kubernetes.io/hostname - weight: 1 - nodeAffinity: - - enableServiceLinks: true - terminationGracePeriodSeconds: 30 - containers: - - name: redis - image: docker.io/bitnamilegacy/redis:7.0.12-debian-11-r34 - imagePullPolicy: "IfNotPresent" - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - runAsGroup: 0 - runAsNonRoot: true - runAsUser: 1001 - seccompProfile: - type: RuntimeDefault - command: - - /bin/bash - args: - - -c - - /opt/bitnami/scripts/start-scripts/start-master.sh - env: - - name: BITNAMI_DEBUG - value: "false" - - name: REDIS_REPLICATION_MODE - value: master - - name: ALLOW_EMPTY_PASSWORD - value: "no" - - name: REDIS_PASSWORD - valueFrom: - secretKeyRef: - name: synapse-redis - key: redis-password - - name: REDIS_TLS_ENABLED - value: "no" - - name: REDIS_PORT - value: "6379" - ports: - - name: redis - containerPort: 6379 - livenessProbe: - initialDelaySeconds: 20 - periodSeconds: 5 - # One second longer than command timeout should prevent generation of zombie processes. - timeoutSeconds: 6 - successThreshold: 1 - failureThreshold: 5 - exec: - command: - - sh - - -c - - /health/ping_liveness_local.sh 5 - readinessProbe: - initialDelaySeconds: 20 - periodSeconds: 5 - timeoutSeconds: 2 - successThreshold: 1 - failureThreshold: 5 - exec: - command: - - sh - - -c - - /health/ping_readiness_local.sh 1 - resources: - limits: {} - requests: {} - volumeMounts: - - name: start-scripts - mountPath: /opt/bitnami/scripts/start-scripts - - name: health - mountPath: /health - - name: redis-data - mountPath: /data - - name: config - mountPath: /opt/bitnami/redis/mounted-etc - - name: redis-tmp-conf - mountPath: /opt/bitnami/redis/etc/ - - name: tmp - mountPath: /tmp - volumes: - - name: start-scripts - configMap: - name: othrys-synapse-redis-scripts - defaultMode: 0755 - - name: health - configMap: - name: othrys-synapse-redis-health - defaultMode: 0755 - - name: config - configMap: - name: othrys-synapse-redis-configuration - - name: redis-tmp-conf - emptyDir: {} - - name: tmp - emptyDir: {} - - name: redis-data - emptyDir: {} ---- -# Source: matrix-synapse/templates/deployment.yaml -# Server: live.bstein.dev -apiVersion: apps/v1 -kind: Deployment -metadata: - name: othrys-synapse-matrix-synapse - labels: - helm.sh/chart: matrix-synapse-3.12.17 - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/version: "1.144.0" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: synapse -spec: - replicas: 1 - strategy: - type: RollingUpdate - selector: - matchLabels: - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/component: synapse - template: - metadata: - annotations: - checksum/config: manual-rtc-enable-11 - checksum/secrets: ec9f3b254a562a0f0709461eb74a8cc91b8c1a2fb06be2594a131776c2541773 - labels: - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/component: synapse - spec: - serviceAccountName: default - - securityContext: - fsGroup: 666 - runAsGroup: 666 - runAsUser: 666 - containers: - - name: synapse - command: - - sh - - -c - - | - export POSTGRES_PASSWORD=$(echo "${POSTGRES_PASSWORD:-}" | sed 's/\//\\\//g' | sed 's/\&/\\\&/g') && \ - export REDIS_PASSWORD=$(echo "${REDIS_PASSWORD:-}" | sed 's/\//\\\//g' | sed 's/\&/\\\&/g') && \ - export OIDC_CLIENT_SECRET_ESCAPED=$(echo "${OIDC_CLIENT_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ - export TURN_SECRET_ESCAPED=$(echo "${TURN_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ - export MAS_SHARED_SECRET_ESCAPED=$(echo "${MAS_SHARED_SECRET:-}" | sed 's/[\\/&]/\\&/g') && \ - export MACAROON_SECRET_KEY_ESCAPED=$(echo "${MACAROON_SECRET_KEY:-}" | sed 's/[\\/&]/\\&/g') && \ - cat /synapse/secrets/*.yaml | \ - sed -e "s/@@POSTGRES_PASSWORD@@/${POSTGRES_PASSWORD:-}/" \ - -e "s/@@REDIS_PASSWORD@@/${REDIS_PASSWORD:-}/" \ - > /synapse/config/conf.d/secrets.yaml - - cp /synapse/config/homeserver.yaml /synapse/runtime-config/homeserver.yaml && \ - if [ -n "${OIDC_CLIENT_SECRET_ESCAPED}" ]; then \ - sed -i "s/@@OIDC_CLIENT_SECRET@@/${OIDC_CLIENT_SECRET_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ - fi; \ - if [ -n "${TURN_SECRET_ESCAPED}" ]; then \ - sed -i "s/@@TURN_SECRET@@/${TURN_SECRET_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ - fi; \ - if [ -n "${MAS_SHARED_SECRET_ESCAPED}" ]; then \ - sed -i "s/@@MAS_SHARED_SECRET@@/${MAS_SHARED_SECRET_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ - fi; \ - if [ -n "${MACAROON_SECRET_KEY_ESCAPED}" ]; then \ - sed -i "s/@@MACAROON_SECRET_KEY@@/${MACAROON_SECRET_KEY_ESCAPED}/g" /synapse/runtime-config/homeserver.yaml; \ - fi - exec python -B -m synapse.app.homeserver \ - -c /synapse/runtime-config/homeserver.yaml \ - -c /synapse/config/conf.d/ - env: - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: synapse-db - key: POSTGRES_PASSWORD - - name: REDIS_PASSWORD - valueFrom: - secretKeyRef: - name: synapse-redis - key: redis-password - - name: OIDC_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: synapse-oidc - key: client-secret - - name: TURN_SECRET - valueFrom: - secretKeyRef: - name: turn-shared-secret - key: TURN_STATIC_AUTH_SECRET - - name: MAS_SHARED_SECRET - valueFrom: - secretKeyRef: - name: mas-secrets-runtime - key: matrix_shared_secret - - name: MACAROON_SECRET_KEY - valueFrom: - secretKeyRef: - name: synapse-macaroon - key: macaroon_secret_key - image: "ghcr.io/element-hq/synapse:v1.144.0" - imagePullPolicy: IfNotPresent - securityContext: - {} - ports: - - name: http - containerPort: 8008 - protocol: TCP - - name: replication - containerPort: 9093 - protocol: TCP - - name: metrics - containerPort: 9090 - protocol: TCP - livenessProbe: - httpGet: - path: /health - port: http - readinessProbe: - httpGet: - path: /health - port: http - startupProbe: - failureThreshold: 12 - httpGet: - path: /health - port: http - volumeMounts: - - name: config - mountPath: /synapse/config - - name: runtime-config - mountPath: /synapse/runtime-config - - name: tmpconf - mountPath: /synapse/config/conf.d - - name: secrets - mountPath: /synapse/secrets - - name: signingkey - mountPath: /synapse/keys - - name: media - mountPath: /synapse/data - - name: tmpdir - mountPath: /tmp - resources: - limits: - cpu: "2" - memory: 3Gi - requests: - cpu: 500m - memory: 1Gi - volumes: - - name: config - configMap: - name: othrys-synapse-matrix-synapse - - name: secrets - secret: - secretName: othrys-synapse-matrix-synapse - - name: signingkey - secret: - secretName: "othrys-synapse-signingkey" - items: - - key: "signing.key" - path: signing.key - - name: tmpconf - emptyDir: {} - - name: tmpdir - emptyDir: {} - - name: runtime-config - emptyDir: {} - - name: media - persistentVolumeClaim: - claimName: othrys-synapse-matrix-synapse - nodeSelector: - hardware: rpi5 - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - preference: - matchExpressions: - - key: hardware - operator: In - values: - - rpi5 - - rpi4 - weight: 50 ---- -# Source: matrix-synapse/templates/signing-key-job.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: othrys-synapse-signingkey-job - labels: - helm.sh/chart: matrix-synapse-3.12.17 - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/version: "1.144.0" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: signingkey-job - annotations: - helm.sh/hook: pre-install - helm.sh/hook-delete-policy: hook-succeeded ---- -# Source: matrix-synapse/templates/signing-key-job.yaml -# Create secret if signing key job is enabled, or if we're running in ArgoCD and we don't have an existing secret -apiVersion: v1 -kind: Secret -metadata: - annotations: - helm.sh/hook: pre-install - helm.sh/hook-delete-policy: never - helm.sh/resource-policy: keep - # If for some reason we didn't detect ArgoCD, but are running in it, we want to make sure we don't delete the secret - argocd.argoproj.io/hook: Skip - name: othrys-synapse-signingkey - labels: - helm.sh/chart: matrix-synapse-3.12.17 - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/version: "1.144.0" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: signingkey-job ---- -# Source: matrix-synapse/templates/signing-key-job.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: othrys-synapse-signingkey-job - labels: - helm.sh/chart: matrix-synapse-3.12.17 - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/version: "1.144.0" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: signingkey-job - annotations: - helm.sh/hook: pre-install - helm.sh/hook-delete-policy: hook-succeeded -rules: - - apiGroups: - - "" - resources: - - secrets - resourceNames: - - othrys-synapse-signingkey - verbs: - - get - - update - - patch ---- -# Source: matrix-synapse/templates/signing-key-job.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: othrys-synapse-signingkey-job - labels: - helm.sh/chart: matrix-synapse-3.12.17 - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/version: "1.144.0" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: signingkey-job - annotations: - helm.sh/hook: pre-install - helm.sh/hook-delete-policy: hook-succeeded -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: othrys-synapse-signingkey-job -subjects: - - kind: ServiceAccount - name: othrys-synapse-signingkey-job - namespace: comms ---- -# Source: matrix-synapse/templates/tests/test-connection.yaml -apiVersion: v1 -kind: Pod -metadata: - name: "othrys-synapse-matrix-synapse-test-connection" - labels: - helm.sh/chart: matrix-synapse-3.12.17 - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/version: "1.144.0" - app.kubernetes.io/managed-by: Helm - annotations: - "helm.sh/hook": test-success -spec: - containers: - - name: wget - image: busybox - command: ['wget'] - args: ['othrys-synapse-matrix-synapse:8008/_matrix/client/versions'] - restartPolicy: Never ---- -# Source: matrix-synapse/templates/signing-key-job.yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: othrys-synapse-signingkey-job - labels: - helm.sh/chart: matrix-synapse-3.12.17 - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/version: "1.144.0" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: signingkey-job - annotations: - helm.sh/hook: pre-install - helm.sh/hook-delete-policy: hook-succeeded -spec: - ttlSecondsAfterFinished: 0 - template: - metadata: - labels: - helm.sh/chart: matrix-synapse-3.12.17 - app.kubernetes.io/name: matrix-synapse - app.kubernetes.io/instance: othrys-synapse - app.kubernetes.io/version: "1.144.0" - app.kubernetes.io/managed-by: Helm - app.kubernetes.io/component: signingkey-job - spec: - containers: - - command: - - sh - - -c - - | - echo "Generating signing key..." - if which generate_signing_key.py >/dev/null; then - generate_signing_key.py -o /synapse/keys/signing.key - else - generate_signing_key -o /synapse/keys/signing.key - fi - image: "matrixdotorg/synapse:latest" - imagePullPolicy: IfNotPresent - name: signing-key-generate - resources: - {} - securityContext: - {} - volumeMounts: - - mountPath: /synapse/keys - name: matrix-synapse-keys - - command: - - sh - - -c - - | - printf "Checking rights to update secret... " - kubectl auth can-i update secret/${SECRET_NAME} - /scripts/signing-key.sh - env: - - name: SECRET_NAME - value: othrys-synapse-signingkey - image: "bitnami/kubectl:latest" - imagePullPolicy: IfNotPresent - name: signing-key-upload - resources: - {} - securityContext: - {} - volumeMounts: - - mountPath: /scripts - name: scripts - readOnly: true - - mountPath: /synapse/keys - name: matrix-synapse-keys - readOnly: true - securityContext: - {} - restartPolicy: Never - serviceAccount: othrys-synapse-signingkey-job - volumes: - - name: scripts - configMap: - name: othrys-synapse-matrix-synapse-scripts - defaultMode: 0755 - - name: matrix-synapse-keys - emptyDir: {} - parallelism: 1 - completions: 1 - backoffLimit: 1 diff --git a/services/comms/synapse-seeder-admin-ensure-job.yaml b/services/comms/synapse-seeder-admin-ensure-job.yaml index 0885722..9905658 100644 --- a/services/comms/synapse-seeder-admin-ensure-job.yaml +++ b/services/comms/synapse-seeder-admin-ensure-job.yaml @@ -2,13 +2,66 @@ apiVersion: batch/v1 kind: Job metadata: - name: synapse-seeder-admin-ensure-2 + name: synapse-seeder-admin-ensure-7 namespace: comms spec: backoffLimit: 2 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" + vault.hashicorp.com/agent-inject-template-turn-secret: | + {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" + vault.hashicorp.com/agent-inject-template-livekit-primary: | + {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-bot-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-seeder-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-matrix: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-homepage: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" + vault.hashicorp.com/agent-inject-template-mas-admin-secret: | + {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" + vault.hashicorp.com/agent-inject-template-synapse-db-pass: | + {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" + vault.hashicorp.com/agent-inject-template-mas-db-pass: | + {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-kc-secret: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} spec: restartPolicy: OnFailure + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + serviceAccountName: comms-vault containers: - name: psql image: postgres:16-alpine @@ -21,16 +74,21 @@ spec: value: synapse - name: PGUSER value: synapse - - name: PGPASSWORD - valueFrom: - secretKeyRef: - name: synapse-db - key: POSTGRES_PASSWORD command: - /bin/sh - -c - | set -euo pipefail + . /vault/scripts/comms_vault_env.sh psql -v ON_ERROR_STOP=1 <<'SQL' UPDATE users SET admin = 1 WHERE name = '@othrys-seeder:live.bstein.dev'; SQL + volumeMounts: + - name: vault-scripts + mountPath: /vault/scripts + readOnly: true + volumes: + - name: vault-scripts + configMap: + name: comms-vault-env + defaultMode: 0555 diff --git a/services/comms/synapse-signingkey-ensure-job.yaml b/services/comms/synapse-signingkey-ensure-job.yaml index 5ebaeda..402a820 100644 --- a/services/comms/synapse-signingkey-ensure-job.yaml +++ b/services/comms/synapse-signingkey-ensure-job.yaml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: othrys-synapse-signingkey-ensure-5 + name: othrys-synapse-signingkey-ensure-7 namespace: comms spec: backoffLimit: 2 @@ -10,6 +10,20 @@ spec: spec: serviceAccountName: othrys-synapse-signingkey-job restartPolicy: OnFailure + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] volumes: - name: work emptyDir: {} @@ -20,23 +34,44 @@ spec: args: - | set -euo pipefail - generate_signing_key -o /work/signing.key + umask 077 + if which generate_signing_key.py >/dev/null; then + generate_signing_key.py -o /work/signing.key + else + generate_signing_key -o /work/signing.key + fi + chmod 0644 /work/signing.key volumeMounts: - name: work mountPath: /work containers: - - name: patch - image: bitnami/kubectl:latest + - name: store + image: registry.bstein.dev/bstein/kubectl:1.35.0 command: ["/bin/sh", "-c"] args: - | set -euo pipefail - if kubectl -n comms get secret othrys-synapse-signingkey -o jsonpath='{.data.signing\.key}' 2>/dev/null | grep -q .; then + vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}" + vault_role="${VAULT_ROLE:-comms-secrets}" + jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" + login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')" + vault_token="$(curl -sS --request POST --data "${login_payload}" \ + "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')" + if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then + echo "vault login failed" >&2 + exit 1 + fi + + existing="$(curl -sS -H "X-Vault-Token: ${vault_token}" \ + "${vault_addr}/v1/kv/data/atlas/comms/othrys-synapse-signingkey" | jq -r '.data.data["signing.key"] // empty')" + if [ -n "${existing}" ]; then exit 0 fi - kubectl -n comms create secret generic othrys-synapse-signingkey \ - --from-file=signing.key=/work/signing.key \ - --dry-run=client -o yaml | kubectl -n comms apply -f - >/dev/null + + value="$(cat /work/signing.key)" + payload="$(jq -nc --arg value "${value}" '{data:{"signing.key":$value}}')" + curl -sS -X POST -H "X-Vault-Token: ${vault_token}" \ + -d "${payload}" "${vault_addr}/v1/kv/data/atlas/comms/othrys-synapse-signingkey" >/dev/null volumeMounts: - name: work mountPath: /work diff --git a/services/comms/synapse-signingkey-ensure-rbac.yaml b/services/comms/synapse-signingkey-ensure-rbac.yaml new file mode 100644 index 0000000..29387f1 --- /dev/null +++ b/services/comms/synapse-signingkey-ensure-rbac.yaml @@ -0,0 +1,36 @@ +# services/comms/synapse-signingkey-ensure-rbac.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: othrys-synapse-signingkey-job + namespace: comms +imagePullSecrets: + - name: harbor-regcred +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: othrys-synapse-signingkey-job + namespace: comms +rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["create"] + - apiGroups: [""] + resources: ["secrets"] + resourceNames: ["othrys-synapse-signingkey"] + verbs: ["get", "patch", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: othrys-synapse-signingkey-job + namespace: comms +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: othrys-synapse-signingkey-job +subjects: + - kind: ServiceAccount + name: othrys-synapse-signingkey-job + namespace: comms diff --git a/services/comms/synapse-user-seed-job.yaml b/services/comms/synapse-user-seed-job.yaml index 083f72e..7fef796 100644 --- a/services/comms/synapse-user-seed-job.yaml +++ b/services/comms/synapse-user-seed-job.yaml @@ -2,14 +2,67 @@ apiVersion: batch/v1 kind: Job metadata: - name: synapse-user-seed-2 + name: synapse-user-seed-7 namespace: comms spec: backoffLimit: 1 ttlSecondsAfterFinished: 3600 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "comms" + vault.hashicorp.com/agent-inject-secret-turn-secret: "kv/data/atlas/comms/turn-shared-secret" + vault.hashicorp.com/agent-inject-template-turn-secret: | + {{- with secret "kv/data/atlas/comms/turn-shared-secret" -}}{{ .Data.data.TURN_STATIC_AUTH_SECRET }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-livekit-primary: "kv/data/atlas/comms/livekit-api" + vault.hashicorp.com/agent-inject-template-livekit-primary: | + {{- with secret "kv/data/atlas/comms/livekit-api" -}}{{ .Data.data.primary }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-bot-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-bot-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "bot-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-seeder-pass: "kv/data/atlas/comms/atlasbot-credentials-runtime" + vault.hashicorp.com/agent-inject-template-seeder-pass: | + {{- with secret "kv/data/atlas/comms/atlasbot-credentials-runtime" -}}{{ index .Data.data "seeder-password" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-matrix: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-matrix: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.matrix }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-chat-homepage: "kv/data/atlas/shared/chat-ai-keys-runtime" + vault.hashicorp.com/agent-inject-template-chat-homepage: | + {{- with secret "kv/data/atlas/shared/chat-ai-keys-runtime" -}}{{ .Data.data.homepage }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-admin-secret: "kv/data/atlas/comms/mas-admin-client-runtime" + vault.hashicorp.com/agent-inject-template-mas-admin-secret: | + {{- with secret "kv/data/atlas/comms/mas-admin-client-runtime" -}}{{ .Data.data.client_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-synapse-db-pass: "kv/data/atlas/comms/synapse-db" + vault.hashicorp.com/agent-inject-template-synapse-db-pass: | + {{- with secret "kv/data/atlas/comms/synapse-db" -}}{{ .Data.data.POSTGRES_PASSWORD }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-db-pass: "kv/data/atlas/comms/mas-db" + vault.hashicorp.com/agent-inject-template-mas-db-pass: | + {{- with secret "kv/data/atlas/comms/mas-db" -}}{{ .Data.data.password }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-matrix-shared: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-matrix-shared: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.matrix_shared_secret }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mas-kc-secret: "kv/data/atlas/comms/mas-secrets-runtime" + vault.hashicorp.com/agent-inject-template-mas-kc-secret: | + {{- with secret "kv/data/atlas/comms/mas-secrets-runtime" -}}{{ .Data.data.keycloak_client_secret }}{{- end -}} spec: restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + serviceAccountName: comms-vault containers: - name: seed image: python:3.11-slim @@ -22,30 +75,16 @@ spec: value: synapse - name: PGUSER value: synapse - - name: PGPASSWORD - valueFrom: - secretKeyRef: - name: synapse-db - key: POSTGRES_PASSWORD - name: SEEDER_USER value: othrys-seeder - - name: SEEDER_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: seeder-password - name: BOT_USER value: atlasbot - - name: BOT_PASS - valueFrom: - secretKeyRef: - name: atlasbot-credentials-runtime - key: bot-password command: - /bin/sh - -c - | set -euo pipefail + . /vault/scripts/comms_vault_env.sh pip install --no-cache-dir psycopg2-binary bcrypt >/dev/null python - <<'PY' import os @@ -118,3 +157,12 @@ spec: finally: conn.close() PY + volumeMounts: + - name: vault-scripts + mountPath: /vault/scripts + readOnly: true + volumes: + - name: vault-scripts + configMap: + name: comms-vault-env + defaultMode: 0555 diff --git a/services/comms/values-element.yaml b/services/comms/values-element.yaml deleted file mode 100644 index b8c7d87..0000000 --- a/services/comms/values-element.yaml +++ /dev/null @@ -1,59 +0,0 @@ -# services/comms/values-element.yaml -replicaCount: 1 - -defaultServer: - url: https://matrix.live.bstein.dev - name: live.bstein.dev - -config: - default_theme: dark - brand: Othrys - disable_custom_urls: true - disable_login_language_selector: true - disable_guests: false - show_labs_settings: true - features: - feature_group_calls: true - feature_video_rooms: true - feature_element_call_video_rooms: true - room_directory: - servers: - - live.bstein.dev - jitsi: {} - element_call: - url: https://call.live.bstein.dev - participant_limit: 16 - brand: Othrys Call - -ingress: - enabled: true - className: traefik - annotations: - cert-manager.io/cluster-issuer: letsencrypt - traefik.ingress.kubernetes.io/router.entrypoints: websecure - hosts: - - live.bstein.dev - tls: - - secretName: live-othrys-tls - hosts: [live.bstein.dev] - -resources: - requests: - cpu: 100m - memory: 256Mi - limits: - cpu: 500m - memory: 512Mi - -nodeSelector: - hardware: rpi5 - -affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 50 - preference: - matchExpressions: - - key: hardware - operator: In - values: ["rpi5","rpi4"] diff --git a/services/comms/values-synapse.yaml b/services/comms/values-synapse.yaml deleted file mode 100644 index 650d0e8..0000000 --- a/services/comms/values-synapse.yaml +++ /dev/null @@ -1,132 +0,0 @@ -# services/comms/values-synapse.yaml -serverName: live.bstein.dev -publicServerName: matrix.live.bstein.dev - -config: - publicBaseurl: https://matrix.live.bstein.dev - -externalPostgresql: - host: postgres-service.postgres.svc.cluster.local - port: 5432 - username: synapse - existingSecret: synapse-db - existingSecretPasswordKey: POSTGRES_PASSWORD - database: synapse - -redis: - enabled: true - auth: - enabled: true - existingSecret: synapse-redis - existingSecretPasswordKey: redis-password - -postgresql: - enabled: false - -persistence: - enabled: true - storageClass: asteria - accessMode: ReadWriteOnce - size: 50Gi - -synapse: - podSecurityContext: - fsGroup: 666 - runAsUser: 666 - runAsGroup: 666 - resources: - requests: - cpu: 500m - memory: 1Gi - limits: - cpu: "2" - memory: 3Gi - nodeSelector: - hardware: rpi5 - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 50 - preference: - matchExpressions: - - key: hardware - operator: In - values: ["rpi5","rpi4"] - -ingress: - enabled: true - className: traefik - annotations: - cert-manager.io/cluster-issuer: letsencrypt - traefik.ingress.kubernetes.io/router.entrypoints: websecure - csHosts: - - matrix.live.bstein.dev - hosts: - - matrix.live.bstein.dev - wkHosts: - - live.bstein.dev - - bstein.dev - tls: - - secretName: matrix-live-tls - hosts: - - matrix.live.bstein.dev - - live.bstein.dev - -extraConfig: - allow_guest_access: true - allow_public_rooms_without_auth: true - auto_join_rooms: - - "#othrys:live.bstein.dev" - autocreate_auto_join_rooms: true - default_room_version: "11" - experimental_features: - msc3266_enabled: true - msc4143_enabled: true - msc4222_enabled: true - max_event_delay_duration: 24h - password_config: - enabled: true - oidc_enabled: true - oidc_providers: - - idp_id: keycloak - idp_name: Keycloak - issuer: https://sso.bstein.dev/realms/atlas - client_id: synapse - client_secret: "@@OIDC_CLIENT_SECRET@@" - client_auth_method: client_secret_post - scopes: ["openid", "profile", "email"] - authorization_endpoint: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/auth - token_endpoint: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/token - userinfo_endpoint: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/userinfo - user_mapping_provider: - config: - localpart_template: "{{ user.preferred_username }}" - display_name_template: "{{ user.name }}" - allow_existing_users: true - rc_message: - per_second: 0.5 - burst_count: 30 - rc_delayed_event_mgmt: - per_second: 1 - burst_count: 20 - rc_login: - address: - burst_count: 20 - per_second: 5 - account: - burst_count: 20 - per_second: 5 - failed_attempts: - burst_count: 20 - per_second: 5 - room_list_publication_rules: - - action: allow - well_known_client: - "m.homeserver": - "base_url": "https://matrix.live.bstein.dev" - "org.matrix.msc4143.rtc_foci": - - type: "livekit" - livekit_service_url: "https://kit.live.bstein.dev/livekit/jwt" - -worker: - enabled: false diff --git a/services/comms/vault-sync-deployment.yaml b/services/comms/vault-sync-deployment.yaml new file mode 100644 index 0000000..f5b5849 --- /dev/null +++ b/services/comms/vault-sync-deployment.yaml @@ -0,0 +1,34 @@ +# services/comms/vault-sync-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: comms-vault-sync + namespace: comms +spec: + replicas: 1 + selector: + matchLabels: + app: comms-vault-sync + template: + metadata: + labels: + app: comms-vault-sync + spec: + serviceAccountName: comms-vault + containers: + - name: sync + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - "sleep infinity" + volumeMounts: + - name: vault-secrets + mountPath: /vault/secrets + readOnly: true + volumes: + - name: vault-secrets + csi: + driver: secrets-store.csi.k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: comms-vault diff --git a/services/crypto/default-serviceaccount.yaml b/services/crypto/default-serviceaccount.yaml new file mode 100644 index 0000000..fca7007 --- /dev/null +++ b/services/crypto/default-serviceaccount.yaml @@ -0,0 +1,8 @@ +# services/crypto/default-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: default + namespace: crypto +imagePullSecrets: + - name: harbor-regcred diff --git a/services/crypto/kustomization.yaml b/services/crypto/kustomization.yaml index 4e6ee87..f31fc8a 100644 --- a/services/crypto/kustomization.yaml +++ b/services/crypto/kustomization.yaml @@ -3,3 +3,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - namespace.yaml + - default-serviceaccount.yaml diff --git a/services/crypto/monerod/deployment.yaml b/services/crypto/monerod/deployment.yaml index 1c20ff8..9d64864 100644 --- a/services/crypto/monerod/deployment.yaml +++ b/services/crypto/monerod/deployment.yaml @@ -1,4 +1,4 @@ -# services/crypto/monerod +# services/crypto/monerod/deployment.yaml apiVersion: apps/v1 kind: Deployment metadata: @@ -18,6 +18,8 @@ spec: fsGroupChangePolicy: OnRootMismatch nodeSelector: node-role.kubernetes.io/worker: "true" + imagePullSecrets: + - name: harbor-regcred affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: diff --git a/services/crypto/wallet-monero-temp/deployment.yaml b/services/crypto/wallet-monero-temp/deployment.yaml new file mode 100644 index 0000000..4b73f64 --- /dev/null +++ b/services/crypto/wallet-monero-temp/deployment.yaml @@ -0,0 +1,82 @@ +# services/crypto/wallet-monero-temp/deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: wallet-monero-temp + namespace: crypto + labels: + app: wallet-monero-temp +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: wallet-monero-temp + template: + metadata: + labels: + app: wallet-monero-temp + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "crypto" + vault.hashicorp.com/agent-inject-secret-wallet-rpc-env.sh: "kv/data/atlas/crypto/wallet-monero-temp-rpc-auth" + vault.hashicorp.com/agent-inject-template-wallet-rpc-env.sh: | + {{- with secret "kv/data/atlas/crypto/wallet-monero-temp-rpc-auth" -}} + export RPC_USER="{{ .Data.data.username }}" + export RPC_PASS="{{ .Data.data.password }}" + {{- end -}} + spec: + serviceAccountName: crypto-vault-sync + automountServiceAccountToken: true + nodeSelector: + node-role.kubernetes.io/worker: "true" + imagePullSecrets: + - name: harbor-regcred + securityContext: + fsGroup: 1000 + fsGroupChangePolicy: OnRootMismatch + initContainers: + - name: volume-permissions + image: busybox:1.36 + imagePullPolicy: IfNotPresent + command: ["/bin/sh", "-lc"] + args: + - chown :1000 /data && chmod 0770 /data + securityContext: + runAsUser: 0 + volumeMounts: + - name: data + mountPath: /data + containers: + - name: wallet-rpc + image: registry.bstein.dev/crypto/monero-wallet-rpc:0.18.4.1 + imagePullPolicy: Always + command: ["/bin/sh", "-lc"] + args: + - | + set -eu + . /vault/secrets/wallet-rpc-env.sh + exec /usr/local/bin/monero-wallet-rpc \ + --wallet-dir /data \ + --daemon-address xmr-node.cakewallet.com:18081 \ + --rpc-bind-ip 0.0.0.0 --rpc-bind-port 18083 \ + --rpc-login "${RPC_USER}:${RPC_PASS}" \ + --confirm-external-bind + ports: + - containerPort: 18083 + name: rpc + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: "1" + memory: 512Mi + volumeMounts: + - name: data + mountPath: /data + volumes: + - name: data + persistentVolumeClaim: + claimName: wallet-monero-temp diff --git a/services/crypto/wallet-monero-temp/kustomization.yaml b/services/crypto/wallet-monero-temp/kustomization.yaml new file mode 100644 index 0000000..6236858 --- /dev/null +++ b/services/crypto/wallet-monero-temp/kustomization.yaml @@ -0,0 +1,7 @@ +# services/crypto/wallet-monero-temp/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - pvc.yaml + - deployment.yaml + - service.yaml diff --git a/services/crypto/wallet-monero-temp/pvc.yaml b/services/crypto/wallet-monero-temp/pvc.yaml new file mode 100644 index 0000000..cf0c757 --- /dev/null +++ b/services/crypto/wallet-monero-temp/pvc.yaml @@ -0,0 +1,13 @@ +# services/crypto/wallet-monero-temp/pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: wallet-monero-temp + namespace: crypto +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + storageClassName: astreae diff --git a/services/crypto/wallet-monero-temp/service.yaml b/services/crypto/wallet-monero-temp/service.yaml new file mode 100644 index 0000000..4bf3566 --- /dev/null +++ b/services/crypto/wallet-monero-temp/service.yaml @@ -0,0 +1,16 @@ +# services/crypto/wallet-monero-temp/service.yaml +apiVersion: v1 +kind: Service +metadata: + name: wallet-monero-temp + namespace: crypto + labels: + app: wallet-monero-temp +spec: + type: ClusterIP + selector: + app: wallet-monero-temp + ports: + - name: rpc + port: 18083 + targetPort: 18083 diff --git a/services/crypto/xmr-miner/deployment.yaml b/services/crypto/xmr-miner/deployment.yaml index efc00ca..820c2ce 100644 --- a/services/crypto/xmr-miner/deployment.yaml +++ b/services/crypto/xmr-miner/deployment.yaml @@ -12,9 +12,18 @@ spec: template: metadata: labels: { app: monero-p2pool } + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "crypto" + vault.hashicorp.com/agent-inject-secret-xmr-env: "kv/data/atlas/crypto/xmr-payout" + vault.hashicorp.com/agent-inject-template-xmr-env: | + {{- with secret "kv/data/atlas/crypto/xmr-payout" -}} + export XMR_ADDR="{{ .Data.data.address }}" + {{- end -}} spec: nodeSelector: node-role.kubernetes.io/worker: "true" + serviceAccountName: crypto-vault-sync affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -61,27 +70,17 @@ spec: - name: monero-p2pool image: debian:bookworm-slim imagePullPolicy: IfNotPresent - command: ["/opt/p2pool/p2pool"] + command: ["/bin/sh", "-c"] args: - - "--host" - - "monerod.crypto.svc.cluster.local" - - "--rpc-port" - - "18081" - - "--zmq-port" - - "18083" - - "--stratum" - - "0.0.0.0:3333" - - "--wallet" - - "$(XMR_ADDR)" - # - "--light-mode" - # - "--no-randomx" - # - "--no-cache" - env: - - name: XMR_ADDR - valueFrom: - secretKeyRef: - name: xmr-payout - key: address + - | + set -eu + . /vault/secrets/xmr-env + exec /opt/p2pool/p2pool \ + --host monerod.crypto.svc.cluster.local \ + --rpc-port 18081 \ + --zmq-port 18083 \ + --stratum 0.0.0.0:3333 \ + --wallet "${XMR_ADDR}" ports: - { name: stratum, containerPort: 3333, protocol: TCP } readinessProbe: diff --git a/services/crypto/xmr-miner/kustomization.yaml b/services/crypto/xmr-miner/kustomization.yaml index 263b25a..2ded8db 100644 --- a/services/crypto/xmr-miner/kustomization.yaml +++ b/services/crypto/xmr-miner/kustomization.yaml @@ -1,8 +1,11 @@ -# services/crypto/xmr-miner/kustomization/yaml +# services/crypto/xmr-miner/kustomization.yaml apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - configmap-sources.yaml + - vault-serviceaccount.yaml + - secretproviderclass.yaml - deployment.yaml + - vault-sync-deployment.yaml - service.yaml - xmrig-daemonset.yaml diff --git a/services/crypto/xmr-miner/secretproviderclass.yaml b/services/crypto/xmr-miner/secretproviderclass.yaml new file mode 100644 index 0000000..a72097f --- /dev/null +++ b/services/crypto/xmr-miner/secretproviderclass.yaml @@ -0,0 +1,21 @@ +# services/crypto/xmr-miner/secretproviderclass.yaml +apiVersion: secrets-store.csi.x-k8s.io/v1 +kind: SecretProviderClass +metadata: + name: crypto-vault + namespace: crypto +spec: + provider: vault + parameters: + vaultAddress: "http://vault.vault.svc.cluster.local:8200" + roleName: "crypto" + objects: | + - objectName: "harbor-pull__dockerconfigjson" + secretPath: "kv/data/atlas/harbor-pull/crypto" + secretKey: "dockerconfigjson" + secretObjects: + - secretName: harbor-regcred + type: kubernetes.io/dockerconfigjson + data: + - objectName: harbor-pull__dockerconfigjson + key: .dockerconfigjson diff --git a/services/crypto/xmr-miner/vault-serviceaccount.yaml b/services/crypto/xmr-miner/vault-serviceaccount.yaml new file mode 100644 index 0000000..96a12c7 --- /dev/null +++ b/services/crypto/xmr-miner/vault-serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/crypto/xmr-miner/vault-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: crypto-vault-sync + namespace: crypto diff --git a/services/crypto/xmr-miner/vault-sync-deployment.yaml b/services/crypto/xmr-miner/vault-sync-deployment.yaml new file mode 100644 index 0000000..fcd08c3 --- /dev/null +++ b/services/crypto/xmr-miner/vault-sync-deployment.yaml @@ -0,0 +1,34 @@ +# services/crypto/xmr-miner/vault-sync-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: crypto-vault-sync + namespace: crypto +spec: + replicas: 1 + selector: + matchLabels: + app: crypto-vault-sync + template: + metadata: + labels: + app: crypto-vault-sync + spec: + serviceAccountName: crypto-vault-sync + containers: + - name: sync + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - "sleep infinity" + volumeMounts: + - name: vault-secrets + mountPath: /vault/secrets + readOnly: true + volumes: + - name: vault-secrets + csi: + driver: secrets-store.csi.k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: crypto-vault diff --git a/services/crypto/xmr-miner/xmrig-daemonset.yaml b/services/crypto/xmr-miner/xmrig-daemonset.yaml index 74836d3..a1ee2ae 100644 --- a/services/crypto/xmr-miner/xmrig-daemonset.yaml +++ b/services/crypto/xmr-miner/xmrig-daemonset.yaml @@ -1,3 +1,4 @@ +# services/crypto/xmr-miner/xmrig-daemonset.yaml apiVersion: apps/v1 kind: DaemonSet metadata: @@ -23,13 +24,9 @@ spec: - key: hardware operator: In values: ["rpi4","rpi5"] - volumes: - - name: payout - secret: - secretName: monero-payout containers: - name: xmrig - image: ghcr.io/tari-project/xmrig:latest + image: ghcr.io/tari-project/xmrig@sha256:80defbfd0b640d604c91cb5101d3642db7928e1e68ee3c6b011289b3565a39d9 imagePullPolicy: IfNotPresent env: - name: XMRIG_THREADS @@ -50,5 +47,3 @@ spec: --donate-level N \ --cpu-priority 1 \ --threads "${THR}" ${EXTRA} - volumeMounts: - - { name: payout, mountPath: /run/xmr, readOnly: true } diff --git a/services/finance/actual-budget-data-pvc.yaml b/services/finance/actual-budget-data-pvc.yaml new file mode 100644 index 0000000..2da64a8 --- /dev/null +++ b/services/finance/actual-budget-data-pvc.yaml @@ -0,0 +1,12 @@ +# services/finance/actual-budget-data-pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: actual-budget-data-encrypted + namespace: finance +spec: + accessModes: ["ReadWriteOnce"] + storageClassName: asteria-encrypted + resources: + requests: + storage: 10Gi diff --git a/services/finance/actual-budget-deployment.yaml b/services/finance/actual-budget-deployment.yaml new file mode 100644 index 0000000..55186b2 --- /dev/null +++ b/services/finance/actual-budget-deployment.yaml @@ -0,0 +1,176 @@ +# services/finance/actual-budget-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: actual-budget + namespace: finance + labels: + app: actual-budget +spec: + replicas: 1 + selector: + matchLabels: + app: actual-budget + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + template: + metadata: + labels: + app: actual-budget + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/agent-init-first: "true" + vault.hashicorp.com/role: "finance" + vault.hashicorp.com/agent-inject-secret-actual-env.sh: "kv/data/atlas/finance/actual-oidc" + vault.hashicorp.com/agent-inject-template-actual-env.sh: | + {{ with secret "kv/data/atlas/finance/actual-oidc" }} + export ACTUAL_OPENID_CLIENT_ID="{{ .Data.data.ACTUAL_OPENID_CLIENT_ID }}" + export ACTUAL_OPENID_CLIENT_SECRET="{{ .Data.data.ACTUAL_OPENID_CLIENT_SECRET }}" + {{ end }} + actual.bstein.dev/bootstrap-rev: "2" + spec: + serviceAccountName: finance-vault + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + - weight: 70 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi4"] + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + fsGroupChangePolicy: OnRootMismatch + initContainers: + - name: init-data-permissions + image: docker.io/alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - | + set -e + mkdir -p /data /data/server-files /data/user-files + chown -R 1000:1000 /data + securityContext: + runAsUser: 0 + runAsGroup: 0 + volumeMounts: + - name: actual-data + mountPath: /data + - name: init-openid + image: actualbudget/actual-server:26.1.0-alpine@sha256:34aae5813fdfee12af2a50c4d0667df68029f1d61b90f45f282473273eb70d0d + command: ["/bin/sh", "-c"] + args: + - | + set -eu + . /vault/secrets/actual-env.sh + node /app/src/scripts/run-migrations.js + node /scripts/actual_openid_bootstrap.mjs + env: + - name: ACTUAL_DATA_DIR + value: /data + - name: ACTUAL_LOGIN_METHOD + value: openid + - name: ACTUAL_ALLOWED_LOGIN_METHODS + value: openid + - name: ACTUAL_MULTIUSER + value: "true" + - name: ACTUAL_OPENID_DISCOVERY_URL + value: https://sso.bstein.dev/realms/atlas + - name: ACTUAL_OPENID_AUTHORIZATION_ENDPOINT + value: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/auth + - name: ACTUAL_OPENID_TOKEN_ENDPOINT + value: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/token + - name: ACTUAL_OPENID_USERINFO_ENDPOINT + value: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/userinfo + - name: ACTUAL_OPENID_PROVIDER_NAME + value: Atlas SSO + - name: ACTUAL_OPENID_SERVER_HOSTNAME + value: https://budget.bstein.dev + volumeMounts: + - name: actual-data + mountPath: /data + - name: actual-openid-bootstrap-script + mountPath: /scripts + readOnly: true + containers: + - name: actual-budget + image: actualbudget/actual-server:26.1.0-alpine@sha256:34aae5813fdfee12af2a50c4d0667df68029f1d61b90f45f282473273eb70d0d + command: ["/bin/sh", "-c"] + args: + - | + . /vault/secrets/actual-env.sh + exec node app + ports: + - name: http + containerPort: 5006 + env: + - name: ACTUAL_DATA_DIR + value: /data + - name: ACTUAL_LOGIN_METHOD + value: openid + - name: ACTUAL_ALLOWED_LOGIN_METHODS + value: openid + - name: ACTUAL_MULTIUSER + value: "true" + - name: ACTUAL_OPENID_DISCOVERY_URL + value: https://sso.bstein.dev/realms/atlas + - name: ACTUAL_OPENID_AUTHORIZATION_ENDPOINT + value: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/auth + - name: ACTUAL_OPENID_TOKEN_ENDPOINT + value: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/token + - name: ACTUAL_OPENID_USERINFO_ENDPOINT + value: https://sso.bstein.dev/realms/atlas/protocol/openid-connect/userinfo + - name: ACTUAL_OPENID_PROVIDER_NAME + value: Atlas SSO + - name: ACTUAL_OPENID_SERVER_HOSTNAME + value: https://budget.bstein.dev + volumeMounts: + - name: actual-data + mountPath: /data + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 3 + failureThreshold: 6 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 30 + periodSeconds: 20 + timeoutSeconds: 3 + failureThreshold: 6 + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: "1" + memory: 1Gi + volumes: + - name: actual-data + persistentVolumeClaim: + claimName: actual-budget-data-encrypted + - name: actual-openid-bootstrap-script + configMap: + name: actual-openid-bootstrap-script + defaultMode: 0555 diff --git a/services/finance/actual-budget-ingress.yaml b/services/finance/actual-budget-ingress.yaml new file mode 100644 index 0000000..c6eaee7 --- /dev/null +++ b/services/finance/actual-budget-ingress.yaml @@ -0,0 +1,26 @@ +# services/finance/actual-budget-ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: actual-budget + namespace: finance + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: web,websecure + traefik.ingress.kubernetes.io/router.tls: "true" + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: ["budget.bstein.dev"] + secretName: actual-budget-tls + rules: + - host: budget.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: actual-budget + port: + number: 80 diff --git a/services/finance/actual-budget-service.yaml b/services/finance/actual-budget-service.yaml new file mode 100644 index 0000000..05213c4 --- /dev/null +++ b/services/finance/actual-budget-service.yaml @@ -0,0 +1,15 @@ +# services/finance/actual-budget-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: actual-budget + namespace: finance + labels: + app: actual-budget +spec: + selector: + app: actual-budget + ports: + - name: http + port: 80 + targetPort: 5006 diff --git a/services/finance/finance-secrets-ensure-job.yaml b/services/finance/finance-secrets-ensure-job.yaml new file mode 100644 index 0000000..67f06cb --- /dev/null +++ b/services/finance/finance-secrets-ensure-job.yaml @@ -0,0 +1,63 @@ +# services/finance/finance-secrets-ensure-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: finance-secrets-ensure-5 + namespace: finance +spec: + backoffLimit: 1 + ttlSecondsAfterFinished: 3600 + template: + spec: + serviceAccountName: finance-secrets-ensure + restartPolicy: Never + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + - weight: 70 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi4"] + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + containers: + - name: ensure + image: python:3.11-alpine + command: ["/bin/sh", "-c"] + args: + - | + set -e + exec python /scripts/finance_secrets_ensure.py + env: + - name: VAULT_ROLE + value: finance-secrets + volumeMounts: + - name: finance-secrets-ensure-script + mountPath: /scripts + readOnly: true + - name: firefly-db + mountPath: /secrets/firefly-db + readOnly: true + - name: actualbudget-db + mountPath: /secrets/actualbudget-db + readOnly: true + volumes: + - name: finance-secrets-ensure-script + configMap: + name: finance-secrets-ensure-script + defaultMode: 0555 + - name: firefly-db + secret: + secretName: firefly-db + - name: actualbudget-db + secret: + secretName: actualbudget-db diff --git a/services/finance/finance-secrets-ensure-rbac.yaml b/services/finance/finance-secrets-ensure-rbac.yaml new file mode 100644 index 0000000..5f70578 --- /dev/null +++ b/services/finance/finance-secrets-ensure-rbac.yaml @@ -0,0 +1,24 @@ +# services/finance/finance-secrets-ensure-rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: finance-secrets-ensure + namespace: finance +rules: + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "create"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: finance-secrets-ensure + namespace: finance +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: finance-secrets-ensure +subjects: + - kind: ServiceAccount + name: finance-secrets-ensure + namespace: finance diff --git a/services/finance/firefly-cronjob.yaml b/services/finance/firefly-cronjob.yaml new file mode 100644 index 0000000..6c4d507 --- /dev/null +++ b/services/finance/firefly-cronjob.yaml @@ -0,0 +1,55 @@ +# services/finance/firefly-cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: firefly-cron + namespace: finance +spec: + schedule: "0 3 * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 1 + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "finance" + vault.hashicorp.com/agent-inject-secret-firefly-cron-token: "kv/data/atlas/finance/firefly-secrets" + vault.hashicorp.com/agent-inject-template-firefly-cron-token: | + {{- with secret "kv/data/atlas/finance/firefly-secrets" -}} + {{ .Data.data.STATIC_CRON_TOKEN }} + {{- end -}} + spec: + serviceAccountName: finance-vault + restartPolicy: Never + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + - weight: 70 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi4"] + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + containers: + - name: cron + image: curlimages/curl:8.5.0 + command: ["/bin/sh", "-c"] + args: + - | + set -eu + token="$(cat /vault/secrets/firefly-cron-token)" + curl -fsS "http://firefly.finance.svc.cluster.local/api/v1/cron/${token}" diff --git a/services/finance/firefly-deployment.yaml b/services/finance/firefly-deployment.yaml new file mode 100644 index 0000000..9c684fe --- /dev/null +++ b/services/finance/firefly-deployment.yaml @@ -0,0 +1,169 @@ +# services/finance/firefly-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: firefly + namespace: finance + labels: + app: firefly +spec: + replicas: 1 + selector: + matchLabels: + app: firefly + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + template: + metadata: + labels: + app: firefly + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "finance" + vault.hashicorp.com/agent-inject-secret-firefly-env.sh: "kv/data/atlas/finance/firefly-db" + vault.hashicorp.com/agent-inject-template-firefly-env.sh: | + {{ with secret "kv/data/atlas/finance/firefly-db" }} + export DB_CONNECTION="pgsql" + export DB_HOST="{{ .Data.data.DB_HOST }}" + export DB_PORT="{{ .Data.data.DB_PORT }}" + export DB_DATABASE="{{ .Data.data.DB_DATABASE }}" + export DB_USERNAME="{{ .Data.data.DB_USERNAME }}" + export DB_PASSWORD="$(cat /vault/secrets/firefly-db-password)" + {{ end }} + {{ with secret "kv/data/atlas/finance/firefly-secrets" }} + export APP_KEY="$(cat /vault/secrets/firefly-app-key)" + export STATIC_CRON_TOKEN="$(cat /vault/secrets/firefly-cron-token)" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export MAIL_USERNAME="{{ index .Data.data "apikey" }}" + export MAIL_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} + vault.hashicorp.com/agent-inject-secret-firefly-db-password: "kv/data/atlas/finance/firefly-db" + vault.hashicorp.com/agent-inject-template-firefly-db-password: | + {{- with secret "kv/data/atlas/finance/firefly-db" -}} + {{ .Data.data.DB_PASSWORD }} + {{- end -}} + vault.hashicorp.com/agent-inject-secret-firefly-app-key: "kv/data/atlas/finance/firefly-secrets" + vault.hashicorp.com/agent-inject-template-firefly-app-key: | + {{- with secret "kv/data/atlas/finance/firefly-secrets" -}} + {{ .Data.data.APP_KEY }} + {{- end -}} + vault.hashicorp.com/agent-inject-secret-firefly-cron-token: "kv/data/atlas/finance/firefly-secrets" + vault.hashicorp.com/agent-inject-template-firefly-cron-token: | + {{- with secret "kv/data/atlas/finance/firefly-secrets" -}} + {{ .Data.data.STATIC_CRON_TOKEN }} + {{- end -}} + firefly.bstein.dev/restart-rev: "2" + spec: + serviceAccountName: finance-vault + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + - weight: 70 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi4"] + securityContext: + fsGroup: 33 + fsGroupChangePolicy: OnRootMismatch + initContainers: + - name: init-storage-permissions + image: docker.io/alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - | + set -e + mkdir -p /var/www/html/storage + chown -R 33:33 /var/www/html/storage + securityContext: + runAsUser: 0 + runAsGroup: 0 + volumeMounts: + - name: firefly-storage + mountPath: /var/www/html/storage + containers: + - name: firefly + image: fireflyiii/core:version-6.4.15 + command: ["/bin/sh", "-c"] + args: + - | + . /vault/secrets/firefly-env.sh + exec /usr/local/bin/docker-php-serversideup-entrypoint /init + env: + - name: APP_ENV + value: production + - name: APP_DEBUG + value: "false" + - name: APP_URL + value: https://money.bstein.dev + - name: SITE_OWNER + value: brad@bstein.dev + - name: TZ + value: Etc/UTC + - name: TRUSTED_PROXIES + value: "**" + - name: AUTHENTICATION_GUARD + value: web + - name: MAIL_MAILER + value: smtp + - name: MAIL_HOST + value: mail.bstein.dev + - name: MAIL_PORT + value: "587" + - name: MAIL_ENCRYPTION + value: tls + - name: MAIL_FROM_ADDRESS + value: no-reply-firefly@bstein.dev + - name: MAIL_FROM_NAME + value: Firefly III + - name: CACHE_DRIVER + value: file + - name: SESSION_DRIVER + value: file + ports: + - name: http + containerPort: 8080 + volumeMounts: + - name: firefly-storage + mountPath: /var/www/html/storage + readinessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 20 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 + livenessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 30 + periodSeconds: 20 + timeoutSeconds: 5 + failureThreshold: 6 + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: "1" + memory: 1Gi + volumes: + - name: firefly-storage + persistentVolumeClaim: + claimName: firefly-storage diff --git a/services/finance/firefly-ingress.yaml b/services/finance/firefly-ingress.yaml new file mode 100644 index 0000000..40324a9 --- /dev/null +++ b/services/finance/firefly-ingress.yaml @@ -0,0 +1,26 @@ +# services/finance/firefly-ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: firefly + namespace: finance + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: web,websecure + traefik.ingress.kubernetes.io/router.tls: "true" + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: ["money.bstein.dev"] + secretName: firefly-tls + rules: + - host: money.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: firefly + port: + number: 80 diff --git a/services/finance/firefly-service.yaml b/services/finance/firefly-service.yaml new file mode 100644 index 0000000..a66980b --- /dev/null +++ b/services/finance/firefly-service.yaml @@ -0,0 +1,15 @@ +# services/finance/firefly-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: firefly + namespace: finance + labels: + app: firefly +spec: + selector: + app: firefly + ports: + - name: http + port: 80 + targetPort: 8080 diff --git a/services/finance/firefly-storage-pvc.yaml b/services/finance/firefly-storage-pvc.yaml new file mode 100644 index 0000000..835f827 --- /dev/null +++ b/services/finance/firefly-storage-pvc.yaml @@ -0,0 +1,12 @@ +# services/finance/firefly-storage-pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: firefly-storage + namespace: finance +spec: + accessModes: ["ReadWriteOnce"] + storageClassName: asteria + resources: + requests: + storage: 10Gi diff --git a/services/finance/firefly-user-sync-cronjob.yaml b/services/finance/firefly-user-sync-cronjob.yaml new file mode 100644 index 0000000..aeadfad --- /dev/null +++ b/services/finance/firefly-user-sync-cronjob.yaml @@ -0,0 +1,92 @@ +# services/finance/firefly-user-sync-cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: firefly-user-sync + namespace: finance + labels: + atlas.bstein.dev/glue: "true" +spec: + schedule: "0 6 * * *" + suspend: true + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 0 + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "finance" + vault.hashicorp.com/agent-inject-secret-firefly-env.sh: "kv/data/atlas/finance/firefly-db" + vault.hashicorp.com/agent-inject-template-firefly-env.sh: | + {{ with secret "kv/data/atlas/finance/firefly-db" }} + export DB_CONNECTION="pgsql" + export DB_HOST="{{ .Data.data.DB_HOST }}" + export DB_PORT="{{ .Data.data.DB_PORT }}" + export DB_DATABASE="{{ .Data.data.DB_DATABASE }}" + export DB_USERNAME="{{ .Data.data.DB_USERNAME }}" + export DB_PASSWORD="$(cat /vault/secrets/firefly-db-password)" + {{ end }} + {{ with secret "kv/data/atlas/finance/firefly-secrets" }} + export APP_KEY="$(cat /vault/secrets/firefly-app-key)" + {{ end }} + vault.hashicorp.com/agent-inject-secret-firefly-db-password: "kv/data/atlas/finance/firefly-db" + vault.hashicorp.com/agent-inject-template-firefly-db-password: | + {{- with secret "kv/data/atlas/finance/firefly-db" -}} + {{ .Data.data.DB_PASSWORD }} + {{- end -}} + vault.hashicorp.com/agent-inject-secret-firefly-app-key: "kv/data/atlas/finance/firefly-secrets" + vault.hashicorp.com/agent-inject-template-firefly-app-key: | + {{- with secret "kv/data/atlas/finance/firefly-secrets" -}} + {{ .Data.data.APP_KEY }} + {{- end -}} + spec: + serviceAccountName: finance-vault + restartPolicy: Never + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + - weight: 70 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi4"] + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + containers: + - name: sync + image: fireflyiii/core:version-6.4.15 + command: ["/bin/sh", "-c"] + args: + - | + set -eu + . /vault/secrets/firefly-env.sh + exec php /scripts/firefly_user_sync.php + env: + - name: APP_ENV + value: production + - name: APP_DEBUG + value: "false" + - name: TZ + value: Etc/UTC + volumeMounts: + - name: firefly-user-sync-script + mountPath: /scripts + readOnly: true + volumes: + - name: firefly-user-sync-script + configMap: + name: firefly-user-sync-script + defaultMode: 0555 diff --git a/services/finance/kustomization.yaml b/services/finance/kustomization.yaml new file mode 100644 index 0000000..e4c414f --- /dev/null +++ b/services/finance/kustomization.yaml @@ -0,0 +1,32 @@ +# services/finance/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: finance +resources: + - namespace.yaml + - serviceaccount.yaml + - portal-rbac.yaml + - finance-secrets-ensure-rbac.yaml + - actual-budget-data-pvc.yaml + - firefly-storage-pvc.yaml + - finance-secrets-ensure-job.yaml + - actual-budget-deployment.yaml + - firefly-deployment.yaml + - firefly-user-sync-cronjob.yaml + - firefly-cronjob.yaml + - actual-budget-service.yaml + - firefly-service.yaml + - actual-budget-ingress.yaml + - firefly-ingress.yaml +generatorOptions: + disableNameSuffixHash: true +configMapGenerator: + - name: actual-openid-bootstrap-script + files: + - actual_openid_bootstrap.mjs=scripts/actual_openid_bootstrap.mjs + - name: firefly-user-sync-script + files: + - firefly_user_sync.php=scripts/firefly_user_sync.php + - name: finance-secrets-ensure-script + files: + - finance_secrets_ensure.py=scripts/finance_secrets_ensure.py diff --git a/services/finance/namespace.yaml b/services/finance/namespace.yaml new file mode 100644 index 0000000..e262026 --- /dev/null +++ b/services/finance/namespace.yaml @@ -0,0 +1,5 @@ +# services/finance/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: finance diff --git a/services/finance/portal-rbac.yaml b/services/finance/portal-rbac.yaml new file mode 100644 index 0000000..2fb7ede --- /dev/null +++ b/services/finance/portal-rbac.yaml @@ -0,0 +1,31 @@ +# services/finance/portal-rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: bstein-dev-home-firefly-user-sync + namespace: finance +rules: + - apiGroups: ["batch"] + resources: ["cronjobs"] + verbs: ["get"] + resourceNames: ["firefly-user-sync"] + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["create", "get", "list", "watch"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: bstein-dev-home-firefly-user-sync + namespace: finance +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: bstein-dev-home-firefly-user-sync +subjects: + - kind: ServiceAccount + name: bstein-dev-home + namespace: bstein-dev-home diff --git a/services/finance/scripts/actual_openid_bootstrap.mjs b/services/finance/scripts/actual_openid_bootstrap.mjs new file mode 100644 index 0000000..3b66fc2 --- /dev/null +++ b/services/finance/scripts/actual_openid_bootstrap.mjs @@ -0,0 +1,100 @@ +import fs from 'node:fs'; +import path from 'node:path'; +import { pathToFileURL } from 'node:url'; + +function findRoot() { + const candidates = []; + if (process.env.ACTUAL_SERVER_ROOT) { + candidates.push(process.env.ACTUAL_SERVER_ROOT); + } + candidates.push('/app'); + candidates.push('/usr/src/app'); + candidates.push('/srv/app'); + candidates.push('/opt/actual-server'); + + for (const base of candidates) { + if (!base) { + continue; + } + const accountDb = path.join(base, 'src', 'account-db.js'); + if (fs.existsSync(accountDb)) { + return base; + } + } + return ''; +} + +const root = findRoot(); +if (!root) { + console.error('actual server root not found'); + process.exit(1); +} + +const accountDbUrl = pathToFileURL(path.join(root, 'src', 'account-db.js')).href; +const loadConfigUrl = pathToFileURL(path.join(root, 'src', 'load-config.js')).href; + +const accountDb = await import(accountDbUrl); +const { default: finalConfig } = await import(loadConfigUrl); + +const openIdEnv = (() => { + if ( + !process.env.ACTUAL_OPENID_DISCOVERY_URL && + !process.env.ACTUAL_OPENID_AUTHORIZATION_ENDPOINT + ) { + return null; + } + + if (process.env.ACTUAL_OPENID_DISCOVERY_URL) { + return { + issuer: process.env.ACTUAL_OPENID_DISCOVERY_URL, + client_id: process.env.ACTUAL_OPENID_CLIENT_ID, + client_secret: process.env.ACTUAL_OPENID_CLIENT_SECRET, + server_hostname: process.env.ACTUAL_OPENID_SERVER_HOSTNAME, + }; + } + + return { + issuer: { + name: process.env.ACTUAL_OPENID_PROVIDER_NAME, + authorization_endpoint: process.env.ACTUAL_OPENID_AUTHORIZATION_ENDPOINT, + token_endpoint: process.env.ACTUAL_OPENID_TOKEN_ENDPOINT, + userinfo_endpoint: process.env.ACTUAL_OPENID_USERINFO_ENDPOINT, + }, + client_id: process.env.ACTUAL_OPENID_CLIENT_ID, + client_secret: process.env.ACTUAL_OPENID_CLIENT_SECRET, + server_hostname: process.env.ACTUAL_OPENID_SERVER_HOSTNAME, + }; +})(); + +const openId = finalConfig?.openId ?? openIdEnv; +if (!openId) { + console.error('missing openid configuration'); + process.exit(1); +} + +const active = accountDb.getActiveLoginMethod(); +if (active === 'openid') { + console.log('openid already enabled'); + process.exit(0); +} + +try { + if (accountDb.needsBootstrap()) { + const result = await accountDb.bootstrap({ openId }); + if (result?.error && result.error !== 'already-bootstrapped') { + console.error(`bootstrap failed: ${result.error}`); + process.exit(1); + } + } else { + const result = await accountDb.enableOpenID({ openId }); + if (result?.error) { + console.error(`enable openid failed: ${result.error}`); + process.exit(1); + } + } + + console.log('openid bootstrap complete'); +} catch (err) { + console.error('openid bootstrap error:', err); + process.exit(1); +} diff --git a/services/finance/scripts/finance_secrets_ensure.py b/services/finance/scripts/finance_secrets_ensure.py new file mode 100644 index 0000000..198ffe6 --- /dev/null +++ b/services/finance/scripts/finance_secrets_ensure.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python3 +import base64 +import json +import os +import secrets +import ssl +import sys +import urllib.error +import urllib.request +from pathlib import Path + + +def read_file(path: Path) -> str: + if not path.exists(): + return "" + return path.read_text(encoding="utf-8").strip() + + +def require_value(label: str, value: str) -> None: + if not value: + raise RuntimeError(f"missing {label}") + + +def http_json(method: str, url: str, headers=None, payload=None, context=None): + data = None + if payload is not None: + data = json.dumps(payload).encode() + req = urllib.request.Request(url, data=data, headers=headers or {}, method=method) + with urllib.request.urlopen(req, timeout=15, context=context) as resp: + body = resp.read() + if not body: + return resp.status, None + return resp.status, json.loads(body.decode()) + + +def k8s_context() -> ssl.SSLContext: + ca_path = Path("/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + if ca_path.exists(): + return ssl.create_default_context(cafile=str(ca_path)) + return ssl.create_default_context() + + +def k8s_api_url(path: str) -> str: + host = os.environ.get("KUBERNETES_SERVICE_HOST") + port = os.environ.get("KUBERNETES_SERVICE_PORT", "443") + if not host: + raise RuntimeError("missing kubernetes service host") + return f"https://{host}:{port}{path}" + + +def k8s_get_secret(namespace: str, name: str, token: str): + try: + _, body = http_json( + "GET", + k8s_api_url(f"/api/v1/namespaces/{namespace}/secrets/{name}"), + headers={"Authorization": f"Bearer {token}"}, + context=k8s_context(), + ) + except urllib.error.HTTPError as exc: + if exc.code == 404: + return None + raise + return body + + +def k8s_create_secret(namespace: str, name: str, token: str, string_data: dict): + payload = { + "apiVersion": "v1", + "kind": "Secret", + "metadata": {"name": name, "namespace": namespace}, + "type": "Opaque", + "stringData": string_data, + } + try: + status, _ = http_json( + "POST", + k8s_api_url(f"/api/v1/namespaces/{namespace}/secrets"), + headers={ + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + }, + payload=payload, + context=k8s_context(), + ) + except urllib.error.HTTPError as exc: + if exc.code == 409: + return + raise + if status not in (200, 201): + raise RuntimeError(f"k8s secret create failed for {name} (status {status})") + + +def decode_secret_value(value: str) -> str: + if not value: + return "" + return base64.b64decode(value.encode()).decode("utf-8") + + +def vault_login(vault_addr: str, role: str, jwt: str) -> str: + status, body = http_json( + "POST", + f"{vault_addr}/v1/auth/kubernetes/login", + headers={"Content-Type": "application/json"}, + payload={"jwt": jwt, "role": role}, + ) + if status != 200 or not body: + raise RuntimeError("vault login failed") + token = body.get("auth", {}).get("client_token") + if not token: + raise RuntimeError("vault login returned no token") + return token + + +def vault_read(vault_addr: str, token: str, path: str): + try: + status, body = http_json( + "GET", + f"{vault_addr}/v1/kv/data/atlas/{path}", + headers={"X-Vault-Token": token}, + ) + except urllib.error.HTTPError as exc: + if exc.code == 404: + return {} + raise + if status != 200 or not body: + return {} + return body.get("data", {}).get("data", {}) or {} + + +def vault_write(vault_addr: str, token: str, path: str, data: dict): + payload = {"data": data} + status, _ = http_json( + "POST", + f"{vault_addr}/v1/kv/data/atlas/{path}", + headers={"X-Vault-Token": token, "Content-Type": "application/json"}, + payload=payload, + ) + if status not in (200, 204): + raise RuntimeError(f"vault write failed for {path} (status {status})") + + +def ensure_firefly_db(vault_addr: str, token: str): + base = Path("/secrets/firefly-db") + host = read_file(base / "DB_HOST") or read_file(base / "DB_HOSTNAME") + port = read_file(base / "DB_PORT") + db_name = read_file(base / "DB_DATABASE") or read_file(base / "DB_NAME") + user = read_file(base / "DB_USERNAME") or read_file(base / "DB_USER") + password = read_file(base / "DB_PASSWORD") or read_file(base / "DB_PASS") + + require_value("firefly-db/DB_HOST", host) + require_value("firefly-db/DB_PORT", port) + require_value("firefly-db/DB_DATABASE", db_name) + require_value("firefly-db/DB_USERNAME", user) + require_value("firefly-db/DB_PASSWORD", password) + + vault_write( + vault_addr, + token, + "finance/firefly-db", + { + "DB_HOST": host, + "DB_PORT": port, + "DB_DATABASE": db_name, + "DB_USERNAME": user, + "DB_PASSWORD": password, + }, + ) + + +def ensure_firefly_secrets(vault_addr: str, token: str): + current = vault_read(vault_addr, token, "finance/firefly-secrets") + app_key = current.get("APP_KEY") + if not app_key: + app_key = "base64:" + base64.b64encode(secrets.token_bytes(32)).decode() + cron_token = current.get("STATIC_CRON_TOKEN") + if not cron_token: + cron_token = secrets.token_urlsafe(32) + vault_write( + vault_addr, + token, + "finance/firefly-secrets", + {"APP_KEY": app_key, "STATIC_CRON_TOKEN": cron_token}, + ) + + +def ensure_actual_db(vault_addr: str, token: str): + base = Path("/secrets/actualbudget-db") + if not base.exists(): + return + host = read_file(base / "DB_HOST") or read_file(base / "DB_HOSTNAME") + port = read_file(base / "DB_PORT") + db_name = read_file(base / "DB_DATABASE") or read_file(base / "DB_NAME") + user = read_file(base / "DB_USERNAME") or read_file(base / "DB_USER") + password = read_file(base / "DB_PASSWORD") or read_file(base / "DB_PASS") + + if not any([host, port, db_name, user, password]): + return + + require_value("actualbudget-db/DB_HOST", host) + require_value("actualbudget-db/DB_PORT", port) + require_value("actualbudget-db/DB_DATABASE", db_name) + require_value("actualbudget-db/DB_USERNAME", user) + require_value("actualbudget-db/DB_PASSWORD", password) + + vault_write( + vault_addr, + token, + "finance/actual-db", + { + "DB_HOST": host, + "DB_PORT": port, + "DB_DATABASE": db_name, + "DB_USERNAME": user, + "DB_PASSWORD": password, + }, + ) + + +def ensure_actual_encryption(vault_addr: str, token: str, sa_token: str): + namespace = os.environ.get("FINANCE_NAMESPACE", "finance") + secret_name = os.environ.get("ACTUAL_BUDGET_PVC_NAME", "actual-budget-data-encrypted") + if not sa_token: + raise RuntimeError("missing service account token for k8s") + + vault_data = vault_read(vault_addr, token, "finance/actual-encryption") + vault_key = vault_data.get("CRYPTO_KEY_VALUE", "") + + k8s_secret = k8s_get_secret(namespace, secret_name, sa_token) + k8s_key = "" + if k8s_secret: + data = k8s_secret.get("data", {}) or {} + k8s_key = decode_secret_value(data.get("CRYPTO_KEY_VALUE", "")) + + if vault_key and k8s_key and vault_key != k8s_key: + raise RuntimeError("actual encryption key mismatch between vault and k8s") + + key = vault_key or k8s_key + provider = "secret" + if not key: + key = secrets.token_urlsafe(48) + vault_write( + vault_addr, + token, + "finance/actual-encryption", + {"CRYPTO_KEY_VALUE": key, "CRYPTO_KEY_PROVIDER": provider}, + ) + elif not vault_key: + vault_write( + vault_addr, + token, + "finance/actual-encryption", + {"CRYPTO_KEY_VALUE": key, "CRYPTO_KEY_PROVIDER": provider}, + ) + + if not k8s_secret: + k8s_create_secret( + namespace, + secret_name, + sa_token, + {"CRYPTO_KEY_VALUE": key, "CRYPTO_KEY_PROVIDER": provider}, + ) + + +def main() -> int: + vault_addr = os.environ.get("VAULT_ADDR", "http://vault.vault.svc.cluster.local:8200") + vault_role = os.environ.get("VAULT_ROLE", "finance-secrets") + sa_token = read_file(Path("/var/run/secrets/kubernetes.io/serviceaccount/token")) + if not sa_token: + raise RuntimeError("missing service account token") + + token = vault_login(vault_addr, vault_role, sa_token) + ensure_firefly_db(vault_addr, token) + ensure_firefly_secrets(vault_addr, token) + ensure_actual_db(vault_addr, token) + ensure_actual_encryption(vault_addr, token, sa_token) + print("finance secrets ensured") + return 0 + + +if __name__ == "__main__": + try: + sys.exit(main()) + except Exception as exc: + print(f"finance secrets ensure failed: {exc}", file=sys.stderr) + sys.exit(1) diff --git a/services/finance/scripts/firefly_user_sync.php b/services/finance/scripts/firefly_user_sync.php new file mode 100644 index 0000000..4036c3d --- /dev/null +++ b/services/finance/scripts/firefly_user_sync.php @@ -0,0 +1,114 @@ +#!/usr/bin/env php +make(ConsoleKernel::class); +$kernel->bootstrap(); + +try { + FireflyConfig::set('single_user_mode', true); +} catch (Throwable $exc) { + error_line('failed to enforce single_user_mode: '.$exc->getMessage()); +} + +$repository = $app->make(UserRepositoryInterface::class); + +$existing_user = User::where('email', $email)->first(); +$first_user = User::count() == 0; + +if (!$existing_user) { + $existing_user = User::create( + [ + 'email' => $email, + 'password' => bcrypt($password), + 'blocked' => false, + 'blocked_code' => null, + ] + ); + + if ($first_user) { + $role = Role::where('name', 'owner')->first(); + if ($role) { + $existing_user->roles()->attach($role); + } + } + + log_line(sprintf('created firefly user %s', $email)); +} else { + log_line(sprintf('updating firefly user %s', $email)); +} + +$existing_user->blocked = false; +$existing_user->blocked_code = null; +$existing_user->save(); + +$repository->changePassword($existing_user, $password); +CreatesGroupMemberships::createGroupMembership($existing_user); + +log_line('firefly user sync complete'); diff --git a/services/finance/serviceaccount.yaml b/services/finance/serviceaccount.yaml new file mode 100644 index 0000000..3d18681 --- /dev/null +++ b/services/finance/serviceaccount.yaml @@ -0,0 +1,12 @@ +# services/finance/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: finance-vault + namespace: finance +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: finance-secrets-ensure + namespace: finance diff --git a/services/gitea/deployment.yaml b/services/gitea/deployment.yaml index ed2cd63..9dc0c87 100644 --- a/services/gitea/deployment.yaml +++ b/services/gitea/deployment.yaml @@ -20,34 +20,56 @@ spec: metadata: labels: app: gitea + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-init-first: "true" + vault.hashicorp.com/role: "gitea" + vault.hashicorp.com/agent-inject-secret-gitea-db-secret__password: "kv/data/atlas/gitea/gitea-db-secret" + vault.hashicorp.com/agent-inject-template-gitea-db-secret__password: | + {{ with secret "kv/data/atlas/gitea/gitea-db-secret" }} + {{ .Data.data.password }} + {{ end }} + vault.hashicorp.com/agent-inject-secret-gitea-secret__SECRET_KEY: "kv/data/atlas/gitea/gitea-secret" + vault.hashicorp.com/agent-inject-template-gitea-secret__SECRET_KEY: | + {{ with secret "kv/data/atlas/gitea/gitea-secret" }} + {{ .Data.data.SECRET_KEY }} + {{ end }} + vault.hashicorp.com/agent-inject-secret-gitea-secret__INTERNAL_TOKEN: "kv/data/atlas/gitea/gitea-secret" + vault.hashicorp.com/agent-inject-template-gitea-secret__INTERNAL_TOKEN: | + {{ with secret "kv/data/atlas/gitea/gitea-secret" }} + {{ .Data.data.INTERNAL_TOKEN }} + {{ end }} + vault.hashicorp.com/agent-inject-secret-gitea-oidc__client_id: "kv/data/atlas/gitea/gitea-oidc" + vault.hashicorp.com/agent-inject-template-gitea-oidc__client_id: | + {{ with secret "kv/data/atlas/gitea/gitea-oidc" }} + {{ .Data.data.client_id }} + {{ end }} + vault.hashicorp.com/agent-inject-secret-gitea-oidc__client_secret: "kv/data/atlas/gitea/gitea-oidc" + vault.hashicorp.com/agent-inject-template-gitea-oidc__client_secret: | + {{ with secret "kv/data/atlas/gitea/gitea-oidc" }} + {{ .Data.data.client_secret }} + {{ end }} + vault.hashicorp.com/agent-inject-secret-gitea-oidc__openid_auto_discovery_url: "kv/data/atlas/gitea/gitea-oidc" + vault.hashicorp.com/agent-inject-template-gitea-oidc__openid_auto_discovery_url: | + {{ with secret "kv/data/atlas/gitea/gitea-oidc" }} + {{ .Data.data.openid_auto_discovery_url }} + {{ end }} spec: + serviceAccountName: gitea-vault initContainers: - name: configure-oidc image: gitea/gitea:1.23 securityContext: runAsUser: 1000 runAsGroup: 1000 - env: - - name: CLIENT_ID - valueFrom: - secretKeyRef: - name: gitea-oidc - key: client_id - - name: CLIENT_SECRET - valueFrom: - secretKeyRef: - name: gitea-oidc - key: client_secret - - name: DISCOVERY_URL - valueFrom: - secretKeyRef: - name: gitea-oidc - key: openid_auto_discovery_url command: - - /bin/bash + - /bin/sh - -c - | set -euo pipefail + CLIENT_ID="$(tr -d '\r\n' &2 + fi else echo "Creating keycloak auth source" - $BIN -c "$APPINI" admin auth add-oauth \ + if ! $BIN -c "$APPINI" admin auth add-oauth \ --name keycloak \ --provider openidConnect \ --key "$CLIENT_ID" \ @@ -82,7 +106,9 @@ spec: --required-claim-value "" \ --group-claim-name groups \ --admin-group admin \ - --skip-local-2fa + --skip-local-2fa; then + echo "OIDC create failed; continuing without blocking startup" >&2 + fi fi volumeMounts: - name: gitea-data @@ -107,6 +133,14 @@ spec: containers: - name: gitea image: gitea/gitea:1.23 + command: ["/bin/sh", "-c"] + args: + - | + set -euo pipefail + export GITEA__security__SECRET_KEY="$(tr -d '\r\n' &2 + exit 1 + fi +fi + +if [ -n "${VAULT_COPY_FILES:-}" ]; then + old_ifs="$IFS" + IFS=',' + for pair in ${VAULT_COPY_FILES}; do + src="${pair%%:*}" + dest="${pair#*:}" + if [ -z "${src}" ] || [ -z "${dest}" ]; then + echo "Vault copy entry malformed: ${pair}" >&2 + exit 1 + fi + if [ ! -f "${src}" ]; then + echo "Vault file not found: ${src}" >&2 + exit 1 + fi + mkdir -p "$(dirname "${dest}")" + cp "${src}" "${dest}" + done + IFS="$old_ifs" +fi + +exec "$@" diff --git a/services/harbor/secretproviderclass.yaml b/services/harbor/secretproviderclass.yaml new file mode 100644 index 0000000..03fef95 --- /dev/null +++ b/services/harbor/secretproviderclass.yaml @@ -0,0 +1,21 @@ +# services/harbor/secretproviderclass.yaml +apiVersion: secrets-store.csi.x-k8s.io/v1 +kind: SecretProviderClass +metadata: + name: harbor-vault + namespace: harbor +spec: + provider: vault + parameters: + vaultAddress: "http://vault.vault.svc.cluster.local:8200" + roleName: "harbor" + objects: | + - objectName: "harbor-pull__dockerconfigjson" + secretPath: "kv/data/atlas/harbor-pull/harbor" + secretKey: "dockerconfigjson" + secretObjects: + - secretName: harbor-regcred + type: kubernetes.io/dockerconfigjson + data: + - objectName: harbor-pull__dockerconfigjson + key: .dockerconfigjson diff --git a/services/harbor/serviceaccount.yaml b/services/harbor/serviceaccount.yaml new file mode 100644 index 0000000..46bb816 --- /dev/null +++ b/services/harbor/serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/harbor/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: harbor-vault-sync + namespace: harbor diff --git a/services/harbor/vault-sync-deployment.yaml b/services/harbor/vault-sync-deployment.yaml new file mode 100644 index 0000000..11aae09 --- /dev/null +++ b/services/harbor/vault-sync-deployment.yaml @@ -0,0 +1,34 @@ +# services/harbor/vault-sync-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: harbor-vault-sync + namespace: harbor +spec: + replicas: 1 + selector: + matchLabels: + app: harbor-vault-sync + template: + metadata: + labels: + app: harbor-vault-sync + spec: + serviceAccountName: harbor-vault-sync + containers: + - name: sync + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - "sleep infinity" + volumeMounts: + - name: vault-secrets + mountPath: /vault/secrets + readOnly: true + volumes: + - name: vault-secrets + csi: + driver: secrets-store.csi.k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: harbor-vault diff --git a/services/health/config/nginx-main.conf b/services/health/config/nginx-main.conf new file mode 100644 index 0000000..81a5e1f --- /dev/null +++ b/services/health/config/nginx-main.conf @@ -0,0 +1,22 @@ +worker_processes auto; +pid /tmp/nginx.pid; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + access_log /dev/stdout main; + error_log /dev/stderr warn; + + sendfile on; + keepalive_timeout 65; + + include /etc/nginx/conf.d/*.conf; +} diff --git a/services/health/config/nginx.conf b/services/health/config/nginx.conf new file mode 100644 index 0000000..b1ee8d4 --- /dev/null +++ b/services/health/config/nginx.conf @@ -0,0 +1,36 @@ +upstream wger { + server 127.0.0.1:8000; +} + +server { + listen 8080; + + client_body_temp_path /tmp/client_body 1 2; + proxy_temp_path /tmp/proxy 1 2; + fastcgi_temp_path /tmp/fastcgi 1 2; + uwsgi_temp_path /tmp/uwsgi 1 2; + scgi_temp_path /tmp/scgi 1 2; + + location = /api/v2/register { + return 404; + } + + location / { + proxy_pass http://wger; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_set_header X-Forwarded-Host $host; + proxy_redirect off; + } + + location /static/ { + alias /wger/static/; + } + + location /media/ { + alias /wger/media/; + } + + client_max_body_size 100M; +} diff --git a/services/health/kustomization.yaml b/services/health/kustomization.yaml new file mode 100644 index 0000000..4dccf8c --- /dev/null +++ b/services/health/kustomization.yaml @@ -0,0 +1,25 @@ +# services/health/kustomization.yaml +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: health +resources: + - namespace.yaml + - serviceaccount.yaml + - portal-rbac.yaml + - wger-media-pvc.yaml + - wger-static-pvc.yaml + - wger-admin-ensure-cronjob.yaml + - wger-user-sync-cronjob.yaml + - wger-deployment.yaml + - wger-service.yaml + - wger-ingress.yaml +generatorOptions: + disableNameSuffixHash: true +configMapGenerator: + - name: wger-nginx-config + files: + - default.conf=config/nginx.conf + - nginx.conf=config/nginx-main.conf + - name: wger-user-sync-script + files: + - wger_user_sync.py=scripts/wger_user_sync.py diff --git a/services/health/namespace.yaml b/services/health/namespace.yaml new file mode 100644 index 0000000..71d6fff --- /dev/null +++ b/services/health/namespace.yaml @@ -0,0 +1,5 @@ +# services/health/namespace.yaml +apiVersion: v1 +kind: Namespace +metadata: + name: health diff --git a/services/health/portal-rbac.yaml b/services/health/portal-rbac.yaml new file mode 100644 index 0000000..cd9acd1 --- /dev/null +++ b/services/health/portal-rbac.yaml @@ -0,0 +1,31 @@ +# services/health/portal-rbac.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: bstein-dev-home-wger-user-sync + namespace: health +rules: + - apiGroups: ["batch"] + resources: ["cronjobs"] + verbs: ["get"] + resourceNames: ["wger-user-sync"] + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["create", "get", "list", "watch"] + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: bstein-dev-home-wger-user-sync + namespace: health +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: bstein-dev-home-wger-user-sync +subjects: + - kind: ServiceAccount + name: bstein-dev-home + namespace: bstein-dev-home diff --git a/services/health/scripts/wger_user_sync.py b/services/health/scripts/wger_user_sync.py new file mode 100644 index 0000000..4963c79 --- /dev/null +++ b/services/health/scripts/wger_user_sync.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +import os +import sys + +import django + + +def _env(name: str, default: str = "") -> str: + value = os.getenv(name, default) + return value.strip() if isinstance(value, str) else "" + + +def _setup_django() -> None: + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings.main") + django.setup() + + +def _set_default_gym(user) -> None: + try: + from wger.gym.models import GymConfig + except Exception: + return + + try: + config = GymConfig.objects.first() + except Exception: + return + + if not config or not getattr(config, "default_gym", None): + return + + profile = getattr(user, "userprofile", None) + if not profile or getattr(profile, "gym", None): + return + + profile.gym = config.default_gym + profile.save() + + +def _ensure_profile(user) -> None: + profile = getattr(user, "userprofile", None) + if not profile: + return + if hasattr(profile, "email_verified") and not profile.email_verified: + profile.email_verified = True + if hasattr(profile, "is_temporary") and profile.is_temporary: + profile.is_temporary = False + profile.save() + + +def _ensure_admin(username: str, password: str, email: str) -> None: + from django.contrib.auth.models import User + + if not username or not password: + raise RuntimeError("admin username/password missing") + + user, created = User.objects.get_or_create(username=username) + if created: + user.is_active = True + if not user.is_staff: + user.is_staff = True + if email: + user.email = email + user.set_password(password) + user.save() + + _ensure_profile(user) + _set_default_gym(user) + print(f"ensured admin user {username}") + + +def _ensure_user(username: str, password: str, email: str) -> None: + from django.contrib.auth.models import User + + if not username or not password: + raise RuntimeError("username/password missing") + + user, created = User.objects.get_or_create(username=username) + if created: + user.is_active = True + if email and user.email != email: + user.email = email + user.set_password(password) + user.save() + + _ensure_profile(user) + _set_default_gym(user) + action = "created" if created else "updated" + print(f"{action} user {username}") + + +def main() -> int: + admin_user = _env("WGER_ADMIN_USERNAME") + admin_password = _env("WGER_ADMIN_PASSWORD") + admin_email = _env("WGER_ADMIN_EMAIL") + + username = _env("WGER_USERNAME") or _env("ONLY_USERNAME") + password = _env("WGER_PASSWORD") + email = _env("WGER_EMAIL") + + if not any([admin_user and admin_password, username and password]): + print("no admin or user payload provided; exiting") + return 0 + + _setup_django() + + if admin_user and admin_password: + _ensure_admin(admin_user, admin_password, admin_email) + + if username and password: + _ensure_user(username, password, email) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/services/health/serviceaccount.yaml b/services/health/serviceaccount.yaml new file mode 100644 index 0000000..78046ba --- /dev/null +++ b/services/health/serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/health/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: health-vault-sync + namespace: health diff --git a/services/health/wger-admin-ensure-cronjob.yaml b/services/health/wger-admin-ensure-cronjob.yaml new file mode 100644 index 0000000..db178a3 --- /dev/null +++ b/services/health/wger-admin-ensure-cronjob.yaml @@ -0,0 +1,119 @@ +# services/health/wger-admin-ensure-cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: wger-admin-ensure + namespace: health + labels: + atlas.bstein.dev/glue: "true" +spec: + schedule: "15 3 * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 1 + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "health" + vault.hashicorp.com/agent-inject-secret-wger-env: "kv/data/atlas/health/wger-db" + vault.hashicorp.com/agent-inject-template-wger-env: | + {{ with secret "kv/data/atlas/health/wger-db" }} + export DJANGO_DB_HOST="{{ .Data.data.DJANGO_DB_HOST }}" + export DJANGO_DB_PORT="{{ .Data.data.DJANGO_DB_PORT }}" + export DJANGO_DB_DATABASE="{{ .Data.data.DJANGO_DB_DATABASE }}" + export DJANGO_DB_USER="{{ .Data.data.DJANGO_DB_USER }}" + export DJANGO_DB_PASSWORD="$(cat /vault/secrets/wger-db-password)" + {{ end }} + {{ with secret "kv/data/atlas/health/wger-secrets" }} + export SECRET_KEY="$(cat /vault/secrets/wger-secret-key)" + export SIGNING_KEY="$(cat /vault/secrets/wger-signing-key)" + {{ end }} + {{ with secret "kv/data/atlas/health/wger-admin" }} + export WGER_ADMIN_USERNAME="$(cat /vault/secrets/wger-admin-username)" + export WGER_ADMIN_PASSWORD="$(cat /vault/secrets/wger-admin-password)" + {{ end }} + vault.hashicorp.com/agent-inject-secret-wger-db-password: "kv/data/atlas/health/wger-db" + vault.hashicorp.com/agent-inject-template-wger-db-password: | + {{- with secret "kv/data/atlas/health/wger-db" -}} + {{ .Data.data.DJANGO_DB_PASSWORD }} + {{- end -}} + vault.hashicorp.com/agent-inject-secret-wger-secret-key: "kv/data/atlas/health/wger-secrets" + vault.hashicorp.com/agent-inject-template-wger-secret-key: | + {{- with secret "kv/data/atlas/health/wger-secrets" -}} + {{ .Data.data.SECRET_KEY }} + {{- end -}} + vault.hashicorp.com/agent-inject-secret-wger-signing-key: "kv/data/atlas/health/wger-secrets" + vault.hashicorp.com/agent-inject-template-wger-signing-key: | + {{- with secret "kv/data/atlas/health/wger-secrets" -}} + {{ .Data.data.SIGNING_KEY }} + {{- end -}} + vault.hashicorp.com/agent-inject-secret-wger-admin-username: "kv/data/atlas/health/wger-admin" + vault.hashicorp.com/agent-inject-template-wger-admin-username: | + {{- with secret "kv/data/atlas/health/wger-admin" -}} + {{ .Data.data.username }} + {{- end -}} + vault.hashicorp.com/agent-inject-secret-wger-admin-password: "kv/data/atlas/health/wger-admin" + vault.hashicorp.com/agent-inject-template-wger-admin-password: | + {{- with secret "kv/data/atlas/health/wger-admin" -}} + {{ .Data.data.password }} + {{- end -}} + spec: + serviceAccountName: health-vault-sync + restartPolicy: Never + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + - weight: 70 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi4"] + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + containers: + - name: ensure + image: wger/server@sha256:710588b78af4e0aa0b4d8a8061e4563e16eae80eeaccfe7f9e0d9cbdd7f0cbc5 + imagePullPolicy: IfNotPresent + command: ["/bin/sh", "-c"] + args: + - | + set -eu + . /vault/secrets/wger-env + exec python3 /scripts/wger_user_sync.py + env: + - name: SITE_URL + value: https://health.bstein.dev + - name: TIME_ZONE + value: Etc/UTC + - name: TZ + value: Etc/UTC + - name: DJANGO_DEBUG + value: "False" + - name: DJANGO_DB_ENGINE + value: django.db.backends.postgresql + - name: DJANGO_CACHE_BACKEND + value: django.core.cache.backends.locmem.LocMemCache + - name: DJANGO_CACHE_LOCATION + value: wger-cache + volumeMounts: + - name: wger-user-sync-script + mountPath: /scripts + readOnly: true + volumes: + - name: wger-user-sync-script + configMap: + name: wger-user-sync-script + defaultMode: 0555 diff --git a/services/health/wger-deployment.yaml b/services/health/wger-deployment.yaml new file mode 100644 index 0000000..56a29a4 --- /dev/null +++ b/services/health/wger-deployment.yaml @@ -0,0 +1,254 @@ +# services/health/wger-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: wger + namespace: health +spec: + replicas: 1 + revisionHistoryLimit: 3 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 0 + maxUnavailable: 1 + selector: + matchLabels: + app: wger + template: + metadata: + labels: + app: wger + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "health" + vault.hashicorp.com/agent-inject-secret-wger-env: "kv/data/atlas/health/wger-db" + vault.hashicorp.com/agent-inject-template-wger-env: | + {{ with secret "kv/data/atlas/health/wger-db" }} + export DJANGO_DB_HOST="{{ .Data.data.DJANGO_DB_HOST }}" + export DJANGO_DB_PORT="{{ .Data.data.DJANGO_DB_PORT }}" + export DJANGO_DB_DATABASE="{{ .Data.data.DJANGO_DB_DATABASE }}" + export DJANGO_DB_USER="{{ .Data.data.DJANGO_DB_USER }}" + export DJANGO_DB_PASSWORD="$(cat /vault/secrets/wger-db-password)" + {{ end }} + {{ with secret "kv/data/atlas/health/wger-secrets" }} + export SECRET_KEY="$(cat /vault/secrets/wger-secret-key)" + export SIGNING_KEY="$(cat /vault/secrets/wger-signing-key)" + {{ end }} + vault.hashicorp.com/agent-inject-secret-wger-db-password: "kv/data/atlas/health/wger-db" + vault.hashicorp.com/agent-inject-template-wger-db-password: | + {{- with secret "kv/data/atlas/health/wger-db" -}} + {{ .Data.data.DJANGO_DB_PASSWORD }} + {{- end -}} + vault.hashicorp.com/agent-inject-secret-wger-secret-key: "kv/data/atlas/health/wger-secrets" + vault.hashicorp.com/agent-inject-template-wger-secret-key: | + {{- with secret "kv/data/atlas/health/wger-secrets" -}} + {{ .Data.data.SECRET_KEY }} + {{- end -}} + vault.hashicorp.com/agent-inject-secret-wger-signing-key: "kv/data/atlas/health/wger-secrets" + vault.hashicorp.com/agent-inject-template-wger-signing-key: | + {{- with secret "kv/data/atlas/health/wger-secrets" -}} + {{ .Data.data.SIGNING_KEY }} + {{- end -}} + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + - weight: 70 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi4"] + securityContext: + fsGroup: 1000 + fsGroupChangePolicy: OnRootMismatch + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + serviceAccountName: health-vault-sync + initContainers: + - name: init-storage + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - | + set -e + mkdir -p /wger/static /wger/media + chown -R 1000:1000 /wger + securityContext: + runAsUser: 0 + runAsGroup: 0 + volumeMounts: + - name: wger-static + mountPath: /wger/static + - name: wger-media + mountPath: /wger/media + containers: + - name: wger + image: wger/server@sha256:710588b78af4e0aa0b4d8a8061e4563e16eae80eeaccfe7f9e0d9cbdd7f0cbc5 + imagePullPolicy: IfNotPresent + command: ["/bin/sh", "-c"] + args: + - | + set -eu + . /vault/secrets/wger-env + exec /home/wger/entrypoint.sh + ports: + - name: app + containerPort: 8000 + env: + - name: SITE_URL + value: https://health.bstein.dev + - name: CSRF_TRUSTED_ORIGINS + value: https://health.bstein.dev + - name: X_FORWARDED_PROTO_HEADER_SET + value: "true" + - name: NUMBER_OF_PROXIES + value: "1" + - name: TIME_ZONE + value: Etc/UTC + - name: TZ + value: Etc/UTC + - name: DJANGO_DEBUG + value: "False" + - name: DJANGO_PERFORM_MIGRATIONS + value: "True" + - name: DJANGO_DB_ENGINE + value: django.db.backends.postgresql + - name: DJANGO_CACHE_BACKEND + value: django.core.cache.backends.locmem.LocMemCache + - name: DJANGO_CACHE_LOCATION + value: wger-cache + - name: DJANGO_CACHE_TIMEOUT + value: "3600" + - name: ALLOW_REGISTRATION + value: "False" + - name: ALLOW_GUEST_USERS + value: "False" + - name: ALLOW_UPLOAD_VIDEOS + value: "False" + - name: USE_CELERY + value: "False" + - name: SYNC_EXERCISES_CELERY + value: "False" + - name: SYNC_INGREDIENTS_CELERY + value: "False" + - name: SYNC_EXERCISE_IMAGES_CELERY + value: "False" + - name: SYNC_EXERCISE_VIDEOS_CELERY + value: "False" + - name: CACHE_API_EXERCISES_CELERY + value: "False" + - name: DOWNLOAD_INGREDIENTS_FROM + value: "None" + - name: ENABLE_EMAIL + value: "False" + volumeMounts: + - name: wger-static + mountPath: /home/wger/static + - name: wger-media + mountPath: /home/wger/media + startupProbe: + httpGet: + path: /api/v2/version/ + port: app + failureThreshold: 60 + periodSeconds: 10 + timeoutSeconds: 3 + readinessProbe: + httpGet: + path: /api/v2/version/ + port: app + initialDelaySeconds: 20 + periodSeconds: 10 + timeoutSeconds: 3 + failureThreshold: 6 + livenessProbe: + httpGet: + path: /api/v2/version/ + port: app + initialDelaySeconds: 45 + periodSeconds: 20 + timeoutSeconds: 3 + failureThreshold: 6 + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: "1" + memory: 2Gi + - name: nginx + image: nginx:1.27.5-alpine@sha256:65645c7bb6a0661892a8b03b89d0743208a18dd2f3f17a54ef4b76fb8e2f2a10 + imagePullPolicy: IfNotPresent + command: + - nginx + args: + - -g + - daemon off; + ports: + - name: http + containerPort: 8080 + securityContext: + runAsUser: 101 + runAsGroup: 101 + allowPrivilegeEscalation: false + volumeMounts: + - name: wger-nginx-config + mountPath: /etc/nginx/conf.d/default.conf + subPath: default.conf + - name: wger-nginx-config + mountPath: /etc/nginx/nginx.conf + subPath: nginx.conf + - name: wger-static + mountPath: /wger/static + - name: wger-media + mountPath: /wger/media + startupProbe: + httpGet: + path: /api/v2/version/ + port: http + failureThreshold: 60 + periodSeconds: 10 + timeoutSeconds: 3 + readinessProbe: + httpGet: + path: /api/v2/version/ + port: http + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 3 + failureThreshold: 6 + livenessProbe: + httpGet: + path: /api/v2/version/ + port: http + initialDelaySeconds: 30 + periodSeconds: 20 + timeoutSeconds: 3 + failureThreshold: 6 + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 200m + memory: 256Mi + volumes: + - name: wger-static + persistentVolumeClaim: + claimName: wger-static + - name: wger-media + persistentVolumeClaim: + claimName: wger-media + - name: wger-nginx-config + configMap: + name: wger-nginx-config + defaultMode: 0444 diff --git a/services/health/wger-ingress.yaml b/services/health/wger-ingress.yaml new file mode 100644 index 0000000..c868fbf --- /dev/null +++ b/services/health/wger-ingress.yaml @@ -0,0 +1,26 @@ +# services/health/wger-ingress.yaml +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: wger + namespace: health + annotations: + kubernetes.io/ingress.class: traefik + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" + cert-manager.io/cluster-issuer: letsencrypt +spec: + tls: + - hosts: ["health.bstein.dev"] + secretName: wger-tls + rules: + - host: health.bstein.dev + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: wger + port: + number: 80 diff --git a/services/health/wger-media-pvc.yaml b/services/health/wger-media-pvc.yaml new file mode 100644 index 0000000..c31d81b --- /dev/null +++ b/services/health/wger-media-pvc.yaml @@ -0,0 +1,12 @@ +# services/health/wger-media-pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: wger-media + namespace: health +spec: + accessModes: ["ReadWriteOnce"] + storageClassName: asteria + resources: + requests: + storage: 20Gi diff --git a/services/health/wger-service.yaml b/services/health/wger-service.yaml new file mode 100644 index 0000000..d01101a --- /dev/null +++ b/services/health/wger-service.yaml @@ -0,0 +1,13 @@ +# services/health/wger-service.yaml +apiVersion: v1 +kind: Service +metadata: + name: wger + namespace: health +spec: + selector: + app: wger + ports: + - name: http + port: 80 + targetPort: http diff --git a/services/health/wger-static-pvc.yaml b/services/health/wger-static-pvc.yaml new file mode 100644 index 0000000..2c6506a --- /dev/null +++ b/services/health/wger-static-pvc.yaml @@ -0,0 +1,12 @@ +# services/health/wger-static-pvc.yaml +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: wger-static + namespace: health +spec: + accessModes: ["ReadWriteOnce"] + storageClassName: asteria + resources: + requests: + storage: 5Gi diff --git a/services/health/wger-user-sync-cronjob.yaml b/services/health/wger-user-sync-cronjob.yaml new file mode 100644 index 0000000..de2dbb9 --- /dev/null +++ b/services/health/wger-user-sync-cronjob.yaml @@ -0,0 +1,106 @@ +# services/health/wger-user-sync-cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: wger-user-sync + namespace: health + labels: + atlas.bstein.dev/glue: "true" +spec: + schedule: "0 5 * * *" + suspend: true + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 0 + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "health" + vault.hashicorp.com/agent-inject-secret-wger-env: "kv/data/atlas/health/wger-db" + vault.hashicorp.com/agent-inject-template-wger-env: | + {{ with secret "kv/data/atlas/health/wger-db" }} + export DJANGO_DB_HOST="{{ .Data.data.DJANGO_DB_HOST }}" + export DJANGO_DB_PORT="{{ .Data.data.DJANGO_DB_PORT }}" + export DJANGO_DB_DATABASE="{{ .Data.data.DJANGO_DB_DATABASE }}" + export DJANGO_DB_USER="{{ .Data.data.DJANGO_DB_USER }}" + export DJANGO_DB_PASSWORD="$(cat /vault/secrets/wger-db-password)" + {{ end }} + {{ with secret "kv/data/atlas/health/wger-secrets" }} + export SECRET_KEY="$(cat /vault/secrets/wger-secret-key)" + export SIGNING_KEY="$(cat /vault/secrets/wger-signing-key)" + {{ end }} + vault.hashicorp.com/agent-inject-secret-wger-db-password: "kv/data/atlas/health/wger-db" + vault.hashicorp.com/agent-inject-template-wger-db-password: | + {{- with secret "kv/data/atlas/health/wger-db" -}} + {{ .Data.data.DJANGO_DB_PASSWORD }} + {{- end -}} + vault.hashicorp.com/agent-inject-secret-wger-secret-key: "kv/data/atlas/health/wger-secrets" + vault.hashicorp.com/agent-inject-template-wger-secret-key: | + {{- with secret "kv/data/atlas/health/wger-secrets" -}} + {{ .Data.data.SECRET_KEY }} + {{- end -}} + vault.hashicorp.com/agent-inject-secret-wger-signing-key: "kv/data/atlas/health/wger-secrets" + vault.hashicorp.com/agent-inject-template-wger-signing-key: | + {{- with secret "kv/data/atlas/health/wger-secrets" -}} + {{ .Data.data.SIGNING_KEY }} + {{- end -}} + spec: + serviceAccountName: health-vault-sync + restartPolicy: Never + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi5"] + - weight: 70 + preference: + matchExpressions: + - key: hardware + operator: In + values: ["rpi4"] + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + containers: + - name: sync + image: wger/server@sha256:710588b78af4e0aa0b4d8a8061e4563e16eae80eeaccfe7f9e0d9cbdd7f0cbc5 + imagePullPolicy: IfNotPresent + command: ["/bin/sh", "-c"] + args: + - | + set -eu + . /vault/secrets/wger-env + exec python3 /scripts/wger_user_sync.py + env: + - name: SITE_URL + value: https://health.bstein.dev + - name: TIME_ZONE + value: Etc/UTC + - name: TZ + value: Etc/UTC + - name: DJANGO_DEBUG + value: "False" + - name: DJANGO_DB_ENGINE + value: django.db.backends.postgresql + - name: DJANGO_CACHE_BACKEND + value: django.core.cache.backends.locmem.LocMemCache + - name: DJANGO_CACHE_LOCATION + value: wger-cache + volumeMounts: + - name: wger-user-sync-script + mountPath: /scripts + readOnly: true + volumes: + - name: wger-user-sync-script + configMap: + name: wger-user-sync-script + defaultMode: 0555 diff --git a/services/jellyfin/deployment.yaml b/services/jellyfin/deployment.yaml index 1177a06..fe84743 100644 --- a/services/jellyfin/deployment.yaml +++ b/services/jellyfin/deployment.yaml @@ -20,7 +20,14 @@ spec: metadata: labels: app: jellyfin + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "pegasus" + vault.hashicorp.com/agent-inject-secret-ldap-config.xml: "kv/data/atlas/pegasus/jellyfin-ldap-config" + vault.hashicorp.com/agent-inject-template-ldap-config.xml: | + {{- with secret "kv/data/atlas/pegasus/jellyfin-ldap-config" -}}{{ index .Data.data "ldap-config.xml" }}{{- end -}} spec: + serviceAccountName: pegasus-vault-sync # Clean up any lingering OIDC artifacts and strip the injected script tag initContainers: - name: strip-oidc @@ -70,15 +77,28 @@ spec: mountPath: /config affinity: nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - titan-22 + - weight: 80 + preference: + matchExpressions: - key: kubernetes.io/hostname operator: In values: - titan-20 - titan-21 - - titan-22 + - weight: 60 + preference: + matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: - titan-24 securityContext: runAsUser: 1000 @@ -90,6 +110,10 @@ spec: - name: jellyfin image: docker.io/jellyfin/jellyfin:10.11.5 imagePullPolicy: IfNotPresent + command: + - /entrypoint.sh + args: + - /jellyfin/jellyfin ports: - name: http containerPort: 8096 @@ -104,6 +128,8 @@ spec: value: "65532" - name: UMASK value: "002" + - name: VAULT_COPY_FILES + value: /vault/secrets/ldap-config.xml:/config/plugins/configurations/LDAP-Auth.xml resources: limits: nvidia.com/gpu.shared: 1 @@ -114,12 +140,11 @@ spec: cpu: "500m" memory: 1Gi volumeMounts: + - name: jellyfin-vault-entrypoint + mountPath: /entrypoint.sh + subPath: vault-entrypoint.sh - name: config mountPath: /config - # Override LDAP plugin configuration from a secret to avoid embedding credentials in the PVC. - - name: ldap-config - mountPath: /config/plugins/configurations/LDAP-Auth.xml - subPath: ldap-config.xml - name: cache mountPath: /cache - name: media @@ -143,20 +168,17 @@ spec: allowPrivilegeEscalation: false readOnlyRootFilesystem: false volumes: + - name: jellyfin-vault-entrypoint + configMap: + name: jellyfin-vault-entrypoint + defaultMode: 493 - name: web-root emptyDir: {} - name: config persistentVolumeClaim: claimName: jellyfin-config-astreae - name: cache - persistentVolumeClaim: - claimName: jellyfin-cache-astreae + emptyDir: {} - name: media persistentVolumeClaim: claimName: jellyfin-media-asteria-new - - name: ldap-config - secret: - secretName: jellyfin-ldap-config - items: - - key: ldap-config.xml - path: ldap-config.xml diff --git a/services/jellyfin/ingress.yaml b/services/jellyfin/ingress.yaml index 85a4d1d..9164b9f 100644 --- a/services/jellyfin/ingress.yaml +++ b/services/jellyfin/ingress.yaml @@ -6,6 +6,8 @@ metadata: namespace: jellyfin annotations: cert-manager.io/cluster-issuer: letsencrypt + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" spec: ingressClassName: traefik rules: diff --git a/services/jellyfin/kustomization.yaml b/services/jellyfin/kustomization.yaml index 51566b8..041b27c 100644 --- a/services/jellyfin/kustomization.yaml +++ b/services/jellyfin/kustomization.yaml @@ -7,3 +7,9 @@ resources: - service.yaml - deployment.yaml - ingress.yaml +generatorOptions: + disableNameSuffixHash: true +configMapGenerator: + - name: jellyfin-vault-entrypoint + files: + - vault-entrypoint.sh=scripts/vault-entrypoint.sh diff --git a/services/jellyfin/scripts/vault-entrypoint.sh b/services/jellyfin/scripts/vault-entrypoint.sh new file mode 100644 index 0000000..fa3b791 --- /dev/null +++ b/services/jellyfin/scripts/vault-entrypoint.sh @@ -0,0 +1,34 @@ +#!/bin/sh +set -eu + +if [ -n "${VAULT_ENV_FILE:-}" ]; then + if [ -f "${VAULT_ENV_FILE}" ]; then + # shellcheck disable=SC1090 + . "${VAULT_ENV_FILE}" + else + echo "Vault env file not found: ${VAULT_ENV_FILE}" >&2 + exit 1 + fi +fi + +if [ -n "${VAULT_COPY_FILES:-}" ]; then + old_ifs="$IFS" + IFS=',' + for pair in ${VAULT_COPY_FILES}; do + src="${pair%%:*}" + dest="${pair#*:}" + if [ -z "${src}" ] || [ -z "${dest}" ]; then + echo "Vault copy entry malformed: ${pair}" >&2 + exit 1 + fi + if [ ! -f "${src}" ]; then + echo "Vault file not found: ${src}" >&2 + exit 1 + fi + mkdir -p "$(dirname "${dest}")" + cp "${src}" "${dest}" + done + IFS="$old_ifs" +fi + +exec "$@" diff --git a/services/jenkins/configmap-jcasc.yaml b/services/jenkins/configmap-jcasc.yaml index 2c188db..ac26350 100644 --- a/services/jenkins/configmap-jcasc.yaml +++ b/services/jenkins/configmap-jcasc.yaml @@ -139,6 +139,25 @@ data: } } } + pipelineJob('titan-iac-quality-gate') { + triggers { + scm('H/5 * * * *') + } + definition { + cpsScm { + scm { + git { + remote { + url('https://scm.bstein.dev/bstein/titan-iac.git') + credentials('gitea-pat') + } + branches('*/feature/vault-consumption') + } + } + scriptPath('ci/Jenkinsfile.titan-iac') + } + } + } base.yaml: | jenkins: disableRememberMe: false diff --git a/services/jenkins/deployment.yaml b/services/jenkins/deployment.yaml index ec749e8..e846a8e 100644 --- a/services/jenkins/deployment.yaml +++ b/services/jenkins/deployment.yaml @@ -17,6 +17,28 @@ spec: metadata: labels: app: jenkins + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "jenkins" + vault.hashicorp.com/agent-inject-secret-jenkins-env: "kv/data/atlas/jenkins/jenkins-oidc" + vault.hashicorp.com/agent-inject-template-jenkins-env: | + {{- with secret "kv/data/atlas/jenkins/jenkins-oidc" -}} + OIDC_CLIENT_ID={{ .Data.data.clientId }} + OIDC_CLIENT_SECRET={{ .Data.data.clientSecret }} + OIDC_AUTH_URL={{ .Data.data.authorizationUrl }} + OIDC_TOKEN_URL={{ .Data.data.tokenUrl }} + OIDC_USERINFO_URL={{ .Data.data.userInfoUrl }} + OIDC_LOGOUT_URL={{ .Data.data.logoutUrl }} + {{- end }} + {{- with secret "kv/data/atlas/jenkins/harbor-robot-creds" -}} + HARBOR_ROBOT_USERNAME={{ .Data.data.username }} + HARBOR_ROBOT_PASSWORD={{ .Data.data.password }} + {{- end }} + {{- with secret "kv/data/atlas/jenkins/gitea-pat" -}} + GITEA_PAT_USERNAME={{ .Data.data.username }} + GITEA_PAT_TOKEN={{ .Data.data.token }} + {{- end -}} + bstein.dev/restarted-at: "2026-01-19T00:25:00Z" spec: serviceAccountName: jenkins nodeSelector: @@ -63,6 +85,12 @@ spec: - name: jenkins image: jenkins/jenkins:2.528.3-jdk21 imagePullPolicy: IfNotPresent + command: + - /bin/sh + - -c + - | + set -e + exec env $(cat /vault/secrets/jenkins-env) /usr/bin/tini -- /usr/local/bin/jenkins.sh ports: - name: http containerPort: 8080 @@ -81,56 +109,6 @@ spec: value: "true" - name: OIDC_ISSUER value: "https://sso.bstein.dev/realms/atlas" - - name: OIDC_CLIENT_ID - valueFrom: - secretKeyRef: - name: jenkins-oidc - key: clientId - - name: OIDC_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: jenkins-oidc - key: clientSecret - - name: OIDC_AUTH_URL - valueFrom: - secretKeyRef: - name: jenkins-oidc - key: authorizationUrl - - name: OIDC_TOKEN_URL - valueFrom: - secretKeyRef: - name: jenkins-oidc - key: tokenUrl - - name: OIDC_USERINFO_URL - valueFrom: - secretKeyRef: - name: jenkins-oidc - key: userInfoUrl - - name: OIDC_LOGOUT_URL - valueFrom: - secretKeyRef: - name: jenkins-oidc - key: logoutUrl - - name: HARBOR_ROBOT_USERNAME - valueFrom: - secretKeyRef: - name: harbor-robot-creds - key: username - - name: HARBOR_ROBOT_PASSWORD - valueFrom: - secretKeyRef: - name: harbor-robot-creds - key: password - - name: GITEA_PAT_USERNAME - valueFrom: - secretKeyRef: - name: gitea-pat - key: username - - name: GITEA_PAT_TOKEN - valueFrom: - secretKeyRef: - name: gitea-pat - key: token resources: requests: cpu: 750m diff --git a/services/jenkins/ingress.yaml b/services/jenkins/ingress.yaml index e702c8c..611eae4 100644 --- a/services/jenkins/ingress.yaml +++ b/services/jenkins/ingress.yaml @@ -7,6 +7,7 @@ metadata: annotations: cert-manager.io/cluster-issuer: letsencrypt traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" spec: ingressClassName: traefik tls: diff --git a/services/keycloak/actual-oidc-secret-ensure-job.yaml b/services/keycloak/actual-oidc-secret-ensure-job.yaml new file mode 100644 index 0000000..3dadb52 --- /dev/null +++ b/services/keycloak/actual-oidc-secret-ensure-job.yaml @@ -0,0 +1,48 @@ +# services/keycloak/actual-oidc-secret-ensure-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: actual-oidc-secret-ensure-3 + namespace: sso +spec: + backoffLimit: 0 + ttlSecondsAfterFinished: 3600 + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "sso-secrets" + vault.hashicorp.com/agent-inject-secret-keycloak-admin-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-admin-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} + spec: + serviceAccountName: mas-secrets-ensure + restartPolicy: Never + volumes: + - name: actual-oidc-secret-ensure-script + configMap: + name: actual-oidc-secret-ensure-script + defaultMode: 0555 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + - key: node-role.kubernetes.io/worker + operator: Exists + containers: + - name: apply + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/scripts/actual_oidc_secret_ensure.sh"] + volumeMounts: + - name: actual-oidc-secret-ensure-script + mountPath: /scripts + readOnly: true diff --git a/services/keycloak/deployment.yaml b/services/keycloak/deployment.yaml index 48cf5e0..3d241c9 100644 --- a/services/keycloak/deployment.yaml +++ b/services/keycloak/deployment.yaml @@ -20,23 +20,43 @@ spec: metadata: labels: app: keycloak + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "sso" + vault.hashicorp.com/agent-inject-secret-keycloak-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/keycloak-db" }} + export KC_DB_URL_DATABASE="{{ .Data.data.POSTGRES_DATABASE }}" + export KC_DB_USERNAME="{{ .Data.data.POSTGRES_USER }}" + export KC_DB_PASSWORD="{{ .Data.data.POSTGRES_PASSWORD }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/portal-e2e-client" }} + export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}" + export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/openldap-admin" }} + export LDAP_ADMIN_PASSWORD="{{ .Data.data.LDAP_ADMIN_PASSWORD }}" + export LDAP_CONFIG_PASSWORD="{{ .Data.data.LDAP_CONFIG_PASSWORD }}" + export LDAP_BIND_PASSWORD="${LDAP_ADMIN_PASSWORD}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export KEYCLOAK_SMTP_USER="{{ index .Data.data "apikey" }}" + export KEYCLOAK_SMTP_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} spec: + serviceAccountName: sso-vault + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" affinity: nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: hardware - operator: In - values: ["rpi5","rpi4"] - - key: node-role.kubernetes.io/worker - operator: Exists - - matchExpressions: - - key: kubernetes.io/hostname - operator: In - values: ["titan-24"] preferredDuringSchedulingIgnoredDuringExecution: - - weight: 90 + - weight: 100 preference: matchExpressions: - key: hardware @@ -69,28 +89,16 @@ spec: - name: keycloak image: quay.io/keycloak/keycloak:26.0.7 imagePullPolicy: IfNotPresent + command: ["/bin/sh", "-c"] args: - - start + - >- + . /vault/secrets/keycloak-env.sh + && exec /opt/keycloak/bin/kc.sh start env: - name: KC_DB value: postgres - name: KC_DB_URL_HOST value: postgres-service.postgres.svc.cluster.local - - name: KC_DB_URL_DATABASE - valueFrom: - secretKeyRef: - name: keycloak-db - key: POSTGRES_DATABASE - - name: KC_DB_USERNAME - valueFrom: - secretKeyRef: - name: keycloak-db - key: POSTGRES_USER - - name: KC_DB_PASSWORD - valueFrom: - secretKeyRef: - name: keycloak-db - key: POSTGRES_PASSWORD - name: KC_DB_SCHEMA value: public - name: KC_HOSTNAME @@ -115,16 +123,6 @@ spec: value: "true" - name: KC_METRICS_ENABLED value: "true" - - name: KEYCLOAK_ADMIN - valueFrom: - secretKeyRef: - name: keycloak-admin - key: username - - name: KEYCLOAK_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: keycloak-admin - key: password - name: KC_EVENTS_LISTENERS value: jboss-logging,mailu-http - name: KC_SPI_EVENTS_LISTENER_MAILU-HTTP_ENDPOINT diff --git a/services/keycloak/harbor-oidc-secret-ensure-job.yaml b/services/keycloak/harbor-oidc-secret-ensure-job.yaml new file mode 100644 index 0000000..8eac50d --- /dev/null +++ b/services/keycloak/harbor-oidc-secret-ensure-job.yaml @@ -0,0 +1,48 @@ +# services/keycloak/harbor-oidc-secret-ensure-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: harbor-oidc-secret-ensure-9 + namespace: sso +spec: + backoffLimit: 0 + ttlSecondsAfterFinished: 3600 + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "sso-secrets" + vault.hashicorp.com/agent-inject-secret-keycloak-admin-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-admin-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} + spec: + serviceAccountName: mas-secrets-ensure + restartPolicy: Never + volumes: + - name: harbor-oidc-secret-ensure-script + configMap: + name: harbor-oidc-secret-ensure-script + defaultMode: 0555 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + - key: node-role.kubernetes.io/worker + operator: Exists + containers: + - name: apply + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/scripts/harbor_oidc_secret_ensure.sh"] + volumeMounts: + - name: harbor-oidc-secret-ensure-script + mountPath: /scripts + readOnly: true diff --git a/services/keycloak/ingress.yaml b/services/keycloak/ingress.yaml index 39f6cb0..9efb18e 100644 --- a/services/keycloak/ingress.yaml +++ b/services/keycloak/ingress.yaml @@ -6,6 +6,8 @@ metadata: namespace: sso annotations: cert-manager.io/cluster-issuer: letsencrypt + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" spec: ingressClassName: traefik rules: diff --git a/services/keycloak/kustomization.yaml b/services/keycloak/kustomization.yaml index ddb4ab2..6030a82 100644 --- a/services/keycloak/kustomization.yaml +++ b/services/keycloak/kustomization.yaml @@ -5,11 +5,14 @@ namespace: sso resources: - namespace.yaml - pvc.yaml + - serviceaccount.yaml + - vault-serviceaccount.yaml + - secretproviderclass.yaml + - vault-sync-deployment.yaml - deployment.yaml - realm-settings-job.yaml + - portal-admin-client-secret-ensure-job.yaml - portal-e2e-client-job.yaml - - portal-e2e-client-secret-sync-rbac.yaml - - portal-e2e-client-secret-sync-cronjob.yaml - portal-e2e-target-client-job.yaml - portal-e2e-token-exchange-permissions-job.yaml - portal-e2e-token-exchange-test-job.yaml @@ -19,6 +22,9 @@ resources: - mas-secrets-ensure-job.yaml - synapse-oidc-secret-ensure-job.yaml - logs-oidc-secret-ensure-job.yaml + - harbor-oidc-secret-ensure-job.yaml + - vault-oidc-secret-ensure-job.yaml + - actual-oidc-secret-ensure-job.yaml - service.yaml - ingress.yaml generatorOptions: @@ -28,6 +34,12 @@ configMapGenerator: files: - test_portal_token_exchange.py=scripts/tests/test_portal_token_exchange.py - test_keycloak_execute_actions_email.py=scripts/tests/test_keycloak_execute_actions_email.py - - name: portal-e2e-client-secret-sync-script + - name: harbor-oidc-secret-ensure-script files: - - sso_portal_e2e_client_secret_sync.sh=scripts/sso_portal_e2e_client_secret_sync.sh + - harbor_oidc_secret_ensure.sh=scripts/harbor_oidc_secret_ensure.sh + - name: vault-oidc-secret-ensure-script + files: + - vault_oidc_secret_ensure.sh=scripts/vault_oidc_secret_ensure.sh + - name: actual-oidc-secret-ensure-script + files: + - actual_oidc_secret_ensure.sh=scripts/actual_oidc_secret_ensure.sh diff --git a/services/keycloak/ldap-federation-job.yaml b/services/keycloak/ldap-federation-job.yaml index 9650468..303fd9f 100644 --- a/services/keycloak/ldap-federation-job.yaml +++ b/services/keycloak/ldap-federation-job.yaml @@ -2,11 +2,41 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-ldap-federation-5 + name: keycloak-ldap-federation-11 namespace: sso spec: backoffLimit: 2 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "sso" + vault.hashicorp.com/agent-inject-secret-keycloak-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/keycloak-db" }} + export KC_DB_URL_DATABASE="{{ .Data.data.POSTGRES_DATABASE }}" + export KC_DB_USERNAME="{{ .Data.data.POSTGRES_USER }}" + export KC_DB_PASSWORD="{{ .Data.data.POSTGRES_PASSWORD }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/portal-e2e-client" }} + export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}" + export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/openldap-admin" }} + export LDAP_ADMIN_PASSWORD="{{ .Data.data.LDAP_ADMIN_PASSWORD }}" + export LDAP_CONFIG_PASSWORD="{{ .Data.data.LDAP_CONFIG_PASSWORD }}" + export LDAP_BIND_PASSWORD="${LDAP_ADMIN_PASSWORD}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export KEYCLOAK_SMTP_USER="{{ index .Data.data "apikey" }}" + export KEYCLOAK_SMTP_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} spec: affinity: nodeAffinity: @@ -19,6 +49,7 @@ spec: - key: node-role.kubernetes.io/worker operator: Exists restartPolicy: OnFailure + serviceAccountName: sso-vault containers: - name: configure image: python:3.11-alpine @@ -28,25 +59,10 @@ spec: value: http://keycloak.sso.svc.cluster.local - name: KEYCLOAK_REALM value: atlas - - name: KEYCLOAK_ADMIN_USER - valueFrom: - secretKeyRef: - name: keycloak-admin - key: username - - name: KEYCLOAK_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: keycloak-admin - key: password - name: LDAP_URL value: ldap://openldap.sso.svc.cluster.local:389 - name: LDAP_BIND_DN value: cn=admin,dc=bstein,dc=dev - - name: LDAP_BIND_PASSWORD - valueFrom: - secretKeyRef: - name: openldap-admin - key: LDAP_ADMIN_PASSWORD - name: LDAP_USERS_DN value: ou=users,dc=bstein,dc=dev - name: LDAP_GROUPS_DN @@ -55,6 +71,7 @@ spec: args: - | set -euo pipefail + . /vault/secrets/keycloak-env.sh python - <<'PY' import json import os @@ -360,3 +377,4 @@ spec: except Exception as e: print(f"WARNING: LDAP cleanup failed (continuing): {e}") PY + volumeMounts: diff --git a/services/keycloak/logs-oidc-secret-ensure-job.yaml b/services/keycloak/logs-oidc-secret-ensure-job.yaml index 11d48f9..14e80df 100644 --- a/services/keycloak/logs-oidc-secret-ensure-job.yaml +++ b/services/keycloak/logs-oidc-secret-ensure-job.yaml @@ -2,24 +2,49 @@ apiVersion: batch/v1 kind: Job metadata: - name: logs-oidc-secret-ensure-2 + name: logs-oidc-secret-ensure-10 namespace: sso spec: backoffLimit: 0 ttlSecondsAfterFinished: 3600 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "sso-secrets" + vault.hashicorp.com/agent-inject-secret-keycloak-admin-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-admin-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} spec: serviceAccountName: mas-secrets-ensure restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] containers: - name: apply - image: alpine:3.20 + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 command: ["/bin/sh", "-c"] args: - | set -euo pipefail - apk add --no-cache curl jq kubectl openssl >/dev/null - + . /vault/secrets/keycloak-admin-env.sh KC_URL="http://keycloak.sso.svc.cluster.local" ACCESS_TOKEN="" for attempt in 1 2 3 4 5; do @@ -73,31 +98,35 @@ spec: exit 1 fi - if kubectl -n logging get secret oauth2-proxy-logs-oidc >/dev/null 2>&1; then - current_cookie="$(kubectl -n logging get secret oauth2-proxy-logs-oidc -o jsonpath='{.data.cookie_secret}' 2>/dev/null || true)" - if [ -n "${current_cookie}" ]; then - decoded="$(printf '%s' "${current_cookie}" | base64 -d 2>/dev/null || true)" - length="$(printf '%s' "${decoded}" | wc -c | tr -d ' ')" - if [ "${length}" = "16" ] || [ "${length}" = "24" ] || [ "${length}" = "32" ]; then - exit 0 - fi - fi + vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}" + vault_role="${VAULT_ROLE:-sso-secrets}" + jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" + login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')" + vault_token="$(curl -sS --request POST --data "${login_payload}" \ + "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')" + if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then + echo "vault login failed" >&2 + exit 1 fi - COOKIE_SECRET="$(openssl rand -hex 16 | tr -d '\n')" - kubectl -n logging create secret generic oauth2-proxy-logs-oidc \ - --from-literal=client_id="logs" \ - --from-literal=client_secret="${CLIENT_SECRET}" \ - --from-literal=cookie_secret="${COOKIE_SECRET}" \ - --dry-run=client -o yaml | kubectl -n logging apply -f - >/dev/null - env: - - name: KEYCLOAK_ADMIN - valueFrom: - secretKeyRef: - name: keycloak-admin - key: username - - name: KEYCLOAK_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: keycloak-admin - key: password + COOKIE_SECRET="$(curl -sS -H "X-Vault-Token: ${vault_token}" \ + "${vault_addr}/v1/kv/data/atlas/logging/oauth2-proxy-logs-oidc" | jq -r '.data.data.cookie_secret // empty')" + if [ -n "${COOKIE_SECRET}" ]; then + length="$(printf '%s' "${COOKIE_SECRET}" | wc -c | tr -d ' ')" + if [ "${length}" != "16" ] && [ "${length}" != "24" ] && [ "${length}" != "32" ]; then + COOKIE_SECRET="" + fi + fi + if [ -z "${COOKIE_SECRET}" ]; then + COOKIE_SECRET="$(openssl rand -hex 16 | tr -d '\n')" + fi + + payload="$(jq -nc \ + --arg client_id "logs" \ + --arg client_secret "${CLIENT_SECRET}" \ + --arg cookie_secret "${COOKIE_SECRET}" \ + '{data:{client_id:$client_id,client_secret:$client_secret,cookie_secret:$cookie_secret}}')" + curl -sS -X POST -H "X-Vault-Token: ${vault_token}" \ + -d "${payload}" "${vault_addr}/v1/kv/data/atlas/logging/oauth2-proxy-logs-oidc" >/dev/null + volumeMounts: + volumes: diff --git a/services/keycloak/mas-secrets-ensure-job.yaml b/services/keycloak/mas-secrets-ensure-job.yaml index b0951cf..24c9e04 100644 --- a/services/keycloak/mas-secrets-ensure-job.yaml +++ b/services/keycloak/mas-secrets-ensure-job.yaml @@ -4,33 +4,58 @@ kind: ServiceAccount metadata: name: mas-secrets-ensure namespace: sso +imagePullSecrets: + - name: harbor-regcred --- apiVersion: batch/v1 kind: Job metadata: - name: mas-secrets-ensure-13 + name: mas-secrets-ensure-21 namespace: sso spec: backoffLimit: 0 ttlSecondsAfterFinished: 3600 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/agent-init-first: "true" + vault.hashicorp.com/role: "sso-secrets" + vault.hashicorp.com/agent-inject-secret-keycloak-admin-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-admin-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} spec: serviceAccountName: mas-secrets-ensure restartPolicy: Never + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" volumes: - name: work emptyDir: {} initContainers: - name: generate - image: alpine:3.20 + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 command: ["/bin/sh", "-c"] args: - | set -euo pipefail + . /vault/secrets/keycloak-admin-env.sh umask 077 - apk add --no-cache curl openssl jq >/dev/null KC_URL="http://keycloak.sso.svc.cluster.local" + for attempt in 1 2 3 4 5 6 7 8 9 10; do + if curl -fsS "${KC_URL}/realms/master" >/dev/null 2>&1; then + break + fi + echo "Waiting for Keycloak to be reachable (attempt ${attempt})" >&2 + sleep $((attempt * 2)) + done ACCESS_TOKEN="" for attempt in 1 2 3 4 5; do TOKEN_JSON="$(curl -sS -X POST "$KC_URL/realms/master/protocol/openid-connect/token" \ @@ -50,14 +75,31 @@ spec: echo "Failed to fetch Keycloak admin token" >&2 exit 1 fi - CLIENT_ID="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ - "$KC_URL/admin/realms/atlas/clients?clientId=othrys-mas" | jq -r '.[0].id' 2>/dev/null || true)" + CLIENT_ID="" + for attempt in 1 2 3 4 5; do + CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients?clientId=othrys-mas" || true)" + CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)" + if [ -n "$CLIENT_ID" ] && [ "$CLIENT_ID" != "null" ]; then + break + fi + echo "Keycloak client lookup failed (attempt ${attempt})" >&2 + sleep $((attempt * 2)) + done if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then echo "Keycloak client othrys-mas not found" >&2 exit 1 fi - CLIENT_SECRET="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ - "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value' 2>/dev/null || true)" + CLIENT_SECRET="" + for attempt in 1 2 3 4 5; do + CLIENT_SECRET="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value' 2>/dev/null || true)" + if [ -n "$CLIENT_SECRET" ] && [ "$CLIENT_SECRET" != "null" ]; then + break + fi + echo "Keycloak client secret lookup failed (attempt ${attempt})" >&2 + sleep $((attempt * 2)) + done if [ -z "$CLIENT_SECRET" ] || [ "$CLIENT_SECRET" = "null" ]; then echo "Keycloak client secret not found" >&2 exit 1 @@ -68,40 +110,44 @@ spec: openssl rand -hex 32 | tr -d '\n' > /work/matrix_shared_secret openssl genpkey -algorithm RSA -pkeyopt rsa_keygen_bits:4096 -out /work/rsa_key >/dev/null 2>&1 chmod 0644 /work/* - env: - - name: KEYCLOAK_ADMIN - valueFrom: - secretKeyRef: - name: keycloak-admin - key: username - - name: KEYCLOAK_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: keycloak-admin - key: password volumeMounts: - name: work mountPath: /work containers: - name: apply - image: bitnami/kubectl:latest + image: registry.bstein.dev/bstein/kubectl:1.35.0 command: ["/bin/sh", "-c"] args: - | set -euo pipefail - if kubectl -n comms get secret mas-secrets-runtime >/dev/null 2>&1; then - kubectl -n comms get secret mas-secrets-runtime -o jsonpath='{.data.encryption}' | base64 -d 2>/dev/null > /tmp/encryption.current || true - current_len="$(wc -c < /tmp/encryption.current | tr -d ' ')" - if [ "${current_len}" = "64" ] && grep -Eq '^[0-9a-fA-F]{64}$' /tmp/encryption.current; then + vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}" + vault_role="${VAULT_ROLE:-sso-secrets}" + jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" + login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')" + vault_token="$(curl -sS --request POST --data "${login_payload}" \ + "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')" + if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then + echo "vault login failed" >&2 + exit 1 + fi + + existing="$(curl -sS -H "X-Vault-Token: ${vault_token}" \ + "${vault_addr}/v1/kv/data/atlas/comms/mas-secrets-runtime" | jq -r '.data.data.encryption // empty')" + if [ -n "${existing}" ]; then + current_len="$(printf '%s' "${existing}" | wc -c | tr -d ' ')" + if [ "${current_len}" = "64" ] && printf '%s' "${existing}" | grep -Eq '^[0-9a-fA-F]{64}$'; then exit 0 fi fi - kubectl -n comms create secret generic mas-secrets-runtime \ - --from-file=encryption=/work/encryption \ - --from-file=matrix_shared_secret=/work/matrix_shared_secret \ - --from-file=keycloak_client_secret=/work/keycloak_client_secret \ - --from-file=rsa_key=/work/rsa_key \ - --dry-run=client -o yaml | kubectl -n comms apply -f - >/dev/null + + payload="$(jq -nc \ + --arg encryption "$(cat /work/encryption)" \ + --arg matrix_shared_secret "$(cat /work/matrix_shared_secret)" \ + --arg keycloak_client_secret "$(cat /work/keycloak_client_secret)" \ + --arg rsa_key "$(cat /work/rsa_key)" \ + '{data:{encryption:$encryption, matrix_shared_secret:$matrix_shared_secret, keycloak_client_secret:$keycloak_client_secret, rsa_key:$rsa_key}}')" + curl -sS -X POST -H "X-Vault-Token: ${vault_token}" \ + -d "${payload}" "${vault_addr}/v1/kv/data/atlas/comms/mas-secrets-runtime" >/dev/null volumeMounts: - name: work mountPath: /work diff --git a/services/keycloak/portal-admin-client-secret-ensure-job.yaml b/services/keycloak/portal-admin-client-secret-ensure-job.yaml new file mode 100644 index 0000000..90dd4b7 --- /dev/null +++ b/services/keycloak/portal-admin-client-secret-ensure-job.yaml @@ -0,0 +1,217 @@ +# services/keycloak/portal-admin-client-secret-ensure-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: keycloak-portal-admin-secret-ensure-4 + namespace: sso +spec: + backoffLimit: 0 + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "sso" + vault.hashicorp.com/agent-inject-secret-keycloak-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }} + export PORTAL_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + spec: + restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + serviceAccountName: sso-vault + containers: + - name: configure + image: python:3.11-alpine + env: + - name: KEYCLOAK_SERVER + value: http://keycloak.sso.svc.cluster.local + - name: KEYCLOAK_REALM + value: atlas + - name: PORTAL_ADMIN_CLIENT_ID + value: bstein-dev-home-admin + command: ["/bin/sh", "-c"] + args: + - | + set -eu + . /vault/secrets/keycloak-env.sh + python - <<'PY' + import json + import os + import urllib.parse + import urllib.error + import urllib.request + + base_url = os.environ["KEYCLOAK_SERVER"].rstrip("/") + realm = os.environ["KEYCLOAK_REALM"] + admin_user = os.environ["KEYCLOAK_ADMIN_USER"] + admin_password = os.environ["KEYCLOAK_ADMIN_PASSWORD"] + client_id = os.environ["PORTAL_ADMIN_CLIENT_ID"] + client_secret = os.environ["PORTAL_ADMIN_CLIENT_SECRET"] + + def http_json(method: str, url: str, token: str, payload=None): + data = None + headers = {"Authorization": f"Bearer {token}"} + if payload is not None: + data = json.dumps(payload).encode() + headers["Content-Type"] = "application/json" + req = urllib.request.Request(url, data=data, headers=headers, method=method) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + body = resp.read() + if not body: + return resp.status, None + return resp.status, json.loads(body.decode()) + except urllib.error.HTTPError as exc: + raw = exc.read() + if not raw: + return exc.code, None + try: + return exc.code, json.loads(raw.decode()) + except Exception: + return exc.code, {"raw": raw.decode(errors="replace")} + + def get_admin_token() -> str: + token_data = urllib.parse.urlencode( + { + "grant_type": "password", + "client_id": "admin-cli", + "username": admin_user, + "password": admin_password, + } + ).encode() + req = urllib.request.Request( + f"{base_url}/realms/master/protocol/openid-connect/token", + data=token_data, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=15) as resp: + body = json.loads(resp.read().decode()) + except urllib.error.HTTPError as exc: + raw = exc.read().decode(errors="replace") + raise SystemExit(f"Token request failed: status={exc.code} body={raw}") + return body["access_token"] + + token = get_admin_token() + status, clients = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients?clientId={urllib.parse.quote(client_id)}", + token, + ) + if status != 200 or not isinstance(clients, list) or not clients: + raise SystemExit(f"Unable to find client {client_id!r} (status={status})") + + client_uuid = None + for item in clients: + if isinstance(item, dict) and item.get("clientId") == client_id: + client_uuid = item.get("id") + break + if not client_uuid: + raise SystemExit(f"Client {client_id!r} has no id") + + status, client_rep = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients/{client_uuid}", + token, + ) + if status != 200 or not isinstance(client_rep, dict): + raise SystemExit(f"Unable to fetch client representation (status={status})") + + updated = False + if client_rep.get("serviceAccountsEnabled") is not True: + client_rep["serviceAccountsEnabled"] = True + updated = True + if client_rep.get("publicClient") is not False: + client_rep["publicClient"] = False + updated = True + if client_rep.get("secret") != client_secret: + client_rep["secret"] = client_secret + updated = True + + if updated: + status, resp = http_json( + "PUT", + f"{base_url}/admin/realms/{realm}/clients/{client_uuid}", + token, + client_rep, + ) + if status not in (200, 204): + raise SystemExit(f"Client update failed (status={status}) resp={resp}") + + # Ensure the portal admin service account can manage users. + status, svc_user = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients/{client_uuid}/service-account-user", + token, + ) + if status != 200 or not isinstance(svc_user, dict) or not svc_user.get("id"): + raise SystemExit(f"Unable to fetch service account user (status={status})") + svc_user_id = svc_user["id"] + + status, rm_clients = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients?clientId=realm-management", + token, + ) + if status != 200 or not isinstance(rm_clients, list) or not rm_clients: + raise SystemExit("Unable to find realm-management client") + rm_uuid = rm_clients[0].get("id") + if not rm_uuid: + raise SystemExit("realm-management client has no id") + + wanted_roles = ("query-users", "view-users", "manage-users", "impersonation") + role_reps = [] + for role_name in wanted_roles: + status, role = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/clients/{rm_uuid}/roles/{urllib.parse.quote(role_name)}", + token, + ) + if status != 200 or not isinstance(role, dict): + raise SystemExit(f"Unable to fetch role {role_name} (status={status})") + role_reps.append({"id": role.get("id"), "name": role.get("name")}) + + status, assigned = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/users/{svc_user_id}/role-mappings/clients/{rm_uuid}", + token, + ) + assigned_names = set() + if status == 200 and isinstance(assigned, list): + for r in assigned: + if isinstance(r, dict) and r.get("name"): + assigned_names.add(r["name"]) + + missing = [r for r in role_reps if r.get("name") and r["name"] not in assigned_names] + if missing: + status, resp = http_json( + "POST", + f"{base_url}/admin/realms/{realm}/users/{svc_user_id}/role-mappings/clients/{rm_uuid}", + token, + missing, + ) + if status not in (200, 204): + raise SystemExit(f"Role mapping update failed (status={status}) resp={resp}") + + print(f"OK: ensured secret for {client_id}") + PY diff --git a/services/keycloak/portal-e2e-client-job.yaml b/services/keycloak/portal-e2e-client-job.yaml index 7f6c5dd..4e0c006 100644 --- a/services/keycloak/portal-e2e-client-job.yaml +++ b/services/keycloak/portal-e2e-client-job.yaml @@ -2,13 +2,58 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-portal-e2e-client-2 + name: keycloak-portal-e2e-client-8 namespace: sso spec: backoffLimit: 0 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "sso" + vault.hashicorp.com/agent-inject-secret-keycloak-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/keycloak-db" }} + export KC_DB_URL_DATABASE="{{ .Data.data.POSTGRES_DATABASE }}" + export KC_DB_USERNAME="{{ .Data.data.POSTGRES_USER }}" + export KC_DB_PASSWORD="{{ .Data.data.POSTGRES_PASSWORD }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/portal-e2e-client" }} + export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}" + export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/openldap-admin" }} + export LDAP_ADMIN_PASSWORD="{{ .Data.data.LDAP_ADMIN_PASSWORD }}" + export LDAP_CONFIG_PASSWORD="{{ .Data.data.LDAP_CONFIG_PASSWORD }}" + export LDAP_BIND_PASSWORD="${LDAP_ADMIN_PASSWORD}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export KEYCLOAK_SMTP_USER="{{ index .Data.data "apikey" }}" + export KEYCLOAK_SMTP_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} spec: restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + serviceAccountName: sso-vault containers: - name: configure image: python:3.11-alpine @@ -17,30 +62,11 @@ spec: value: http://keycloak.sso.svc.cluster.local - name: KEYCLOAK_REALM value: atlas - - name: KEYCLOAK_ADMIN_USER - valueFrom: - secretKeyRef: - name: keycloak-admin - key: username - - name: KEYCLOAK_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: keycloak-admin - key: password - - name: PORTAL_E2E_CLIENT_ID - valueFrom: - secretKeyRef: - name: portal-e2e-client - key: client_id - - name: PORTAL_E2E_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: portal-e2e-client - key: client_secret command: ["/bin/sh", "-c"] args: - | set -euo pipefail + . /vault/secrets/keycloak-env.sh python - <<'PY' import json import os @@ -245,3 +271,5 @@ spec: if status not in (200, 204): raise SystemExit(f"Role mapping update failed (status={status}) resp={resp}") PY + volumeMounts: + volumes: diff --git a/services/keycloak/portal-e2e-client-secret-sync-cronjob.yaml b/services/keycloak/portal-e2e-client-secret-sync-cronjob.yaml deleted file mode 100644 index 8bb7e55..0000000 --- a/services/keycloak/portal-e2e-client-secret-sync-cronjob.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# services/keycloak/portal-e2e-client-secret-sync-cronjob.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: portal-e2e-client-secret-sync - namespace: sso -spec: - schedule: "*/10 * * * *" - concurrencyPolicy: Forbid - successfulJobsHistoryLimit: 1 - failedJobsHistoryLimit: 3 - jobTemplate: - spec: - backoffLimit: 1 - template: - spec: - serviceAccountName: portal-e2e-client-secret-sync - restartPolicy: Never - containers: - - name: sync - image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 - command: ["/usr/bin/env", "bash"] - args: ["/scripts/sso_portal_e2e_client_secret_sync.sh"] - volumeMounts: - - name: script - mountPath: /scripts - readOnly: true - volumes: - - name: script - configMap: - name: portal-e2e-client-secret-sync-script - defaultMode: 0555 diff --git a/services/keycloak/portal-e2e-client-secret-sync-rbac.yaml b/services/keycloak/portal-e2e-client-secret-sync-rbac.yaml deleted file mode 100644 index e2d39bb..0000000 --- a/services/keycloak/portal-e2e-client-secret-sync-rbac.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# services/keycloak/portal-e2e-client-secret-sync-rbac.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: portal-e2e-client-secret-sync - namespace: sso ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: portal-e2e-client-secret-sync-source - namespace: sso -rules: - - apiGroups: [""] - resources: ["secrets"] - resourceNames: ["portal-e2e-client"] - verbs: ["get"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: portal-e2e-client-secret-sync-source - namespace: sso -subjects: - - kind: ServiceAccount - name: portal-e2e-client-secret-sync - namespace: sso -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: portal-e2e-client-secret-sync-source diff --git a/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml b/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml index 877dd55..35f79a6 100644 --- a/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml +++ b/services/keycloak/portal-e2e-execute-actions-email-test-job.yaml @@ -2,13 +2,58 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-portal-e2e-execute-actions-email-5 + name: keycloak-portal-e2e-execute-actions-email-14 namespace: sso spec: backoffLimit: 3 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "sso" + vault.hashicorp.com/agent-inject-secret-keycloak-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/keycloak-db" }} + export KC_DB_URL_DATABASE="{{ .Data.data.POSTGRES_DATABASE }}" + export KC_DB_USERNAME="{{ .Data.data.POSTGRES_USER }}" + export KC_DB_PASSWORD="{{ .Data.data.POSTGRES_PASSWORD }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/portal-e2e-client" }} + export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}" + export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/openldap-admin" }} + export LDAP_ADMIN_PASSWORD="{{ .Data.data.LDAP_ADMIN_PASSWORD }}" + export LDAP_CONFIG_PASSWORD="{{ .Data.data.LDAP_CONFIG_PASSWORD }}" + export LDAP_BIND_PASSWORD="${LDAP_ADMIN_PASSWORD}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export KEYCLOAK_SMTP_USER="{{ index .Data.data "apikey" }}" + export KEYCLOAK_SMTP_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} spec: restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + serviceAccountName: sso-vault containers: - name: test image: python:3.11-alpine @@ -17,20 +62,10 @@ spec: value: http://keycloak.sso.svc.cluster.local - name: KEYCLOAK_REALM value: atlas - - name: PORTAL_E2E_CLIENT_ID - valueFrom: - secretKeyRef: - name: portal-e2e-client - key: client_id - - name: PORTAL_E2E_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: portal-e2e-client - key: client_secret - name: E2E_PROBE_USERNAME - value: e2e-smtp-probe + value: robotuser - name: E2E_PROBE_EMAIL - value: robot@bstein.dev + value: robotuser@bstein.dev - name: EXECUTE_ACTIONS_CLIENT_ID value: bstein-dev-home - name: EXECUTE_ACTIONS_REDIRECT_URI @@ -38,7 +73,8 @@ spec: command: ["/bin/sh", "-c"] args: - | - set -euo pipefail + set -eu + . /vault/secrets/keycloak-env.sh python /scripts/test_keycloak_execute_actions_email.py volumeMounts: - name: tests diff --git a/services/keycloak/portal-e2e-target-client-job.yaml b/services/keycloak/portal-e2e-target-client-job.yaml index 45b3980..196b48b 100644 --- a/services/keycloak/portal-e2e-target-client-job.yaml +++ b/services/keycloak/portal-e2e-target-client-job.yaml @@ -2,13 +2,58 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-portal-e2e-target-1 + name: keycloak-portal-e2e-target-7 namespace: sso spec: backoffLimit: 0 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "sso" + vault.hashicorp.com/agent-inject-secret-keycloak-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/keycloak-db" }} + export KC_DB_URL_DATABASE="{{ .Data.data.POSTGRES_DATABASE }}" + export KC_DB_USERNAME="{{ .Data.data.POSTGRES_USER }}" + export KC_DB_PASSWORD="{{ .Data.data.POSTGRES_PASSWORD }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/portal-e2e-client" }} + export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}" + export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/openldap-admin" }} + export LDAP_ADMIN_PASSWORD="{{ .Data.data.LDAP_ADMIN_PASSWORD }}" + export LDAP_CONFIG_PASSWORD="{{ .Data.data.LDAP_CONFIG_PASSWORD }}" + export LDAP_BIND_PASSWORD="${LDAP_ADMIN_PASSWORD}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export KEYCLOAK_SMTP_USER="{{ index .Data.data "apikey" }}" + export KEYCLOAK_SMTP_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} spec: restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + serviceAccountName: sso-vault containers: - name: configure image: python:3.11-alpine @@ -17,22 +62,13 @@ spec: value: http://keycloak.sso.svc.cluster.local - name: KEYCLOAK_REALM value: atlas - - name: KEYCLOAK_ADMIN_USER - valueFrom: - secretKeyRef: - name: keycloak-admin - key: username - - name: KEYCLOAK_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: keycloak-admin - key: password - name: TARGET_CLIENT_ID value: bstein-dev-home command: ["/bin/sh", "-c"] args: - | set -euo pipefail + . /vault/secrets/keycloak-env.sh python - <<'PY' import json import os @@ -136,3 +172,5 @@ spec: print(f"OK: ensured token exchange enabled on client {target_client_id}") PY + volumeMounts: + volumes: diff --git a/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml b/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml index 104d6f0..647b8f9 100644 --- a/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml +++ b/services/keycloak/portal-e2e-token-exchange-permissions-job.yaml @@ -2,13 +2,58 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-portal-e2e-token-exchange-permissions-5 + name: keycloak-portal-e2e-token-exchange-permissions-11 namespace: sso spec: backoffLimit: 6 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "sso" + vault.hashicorp.com/agent-inject-secret-keycloak-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/keycloak-db" }} + export KC_DB_URL_DATABASE="{{ .Data.data.POSTGRES_DATABASE }}" + export KC_DB_USERNAME="{{ .Data.data.POSTGRES_USER }}" + export KC_DB_PASSWORD="{{ .Data.data.POSTGRES_PASSWORD }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/portal-e2e-client" }} + export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}" + export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/openldap-admin" }} + export LDAP_ADMIN_PASSWORD="{{ .Data.data.LDAP_ADMIN_PASSWORD }}" + export LDAP_CONFIG_PASSWORD="{{ .Data.data.LDAP_CONFIG_PASSWORD }}" + export LDAP_BIND_PASSWORD="${LDAP_ADMIN_PASSWORD}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export KEYCLOAK_SMTP_USER="{{ index .Data.data "apikey" }}" + export KEYCLOAK_SMTP_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} spec: restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + serviceAccountName: sso-vault containers: - name: configure image: python:3.11-alpine @@ -17,16 +62,6 @@ spec: value: http://keycloak.sso.svc.cluster.local - name: KEYCLOAK_REALM value: atlas - - name: KEYCLOAK_ADMIN_USER - valueFrom: - secretKeyRef: - name: keycloak-admin - key: username - - name: KEYCLOAK_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: keycloak-admin - key: password - name: PORTAL_E2E_CLIENT_ID value: test-portal-e2e - name: TARGET_CLIENT_ID @@ -35,6 +70,7 @@ spec: args: - | set -euo pipefail + . /vault/secrets/keycloak-env.sh python - <<'PY' import json import os @@ -269,3 +305,4 @@ spec: print("OK: configured token exchange permissions for portal E2E client") PY + volumeMounts: diff --git a/services/keycloak/portal-e2e-token-exchange-test-job.yaml b/services/keycloak/portal-e2e-token-exchange-test-job.yaml index ab43303..edd7555 100644 --- a/services/keycloak/portal-e2e-token-exchange-test-job.yaml +++ b/services/keycloak/portal-e2e-token-exchange-test-job.yaml @@ -2,14 +2,59 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-portal-e2e-token-exchange-test-1 + name: keycloak-portal-e2e-token-exchange-test-7 namespace: sso spec: backoffLimit: 6 ttlSecondsAfterFinished: 3600 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "sso" + vault.hashicorp.com/agent-inject-secret-keycloak-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/keycloak-db" }} + export KC_DB_URL_DATABASE="{{ .Data.data.POSTGRES_DATABASE }}" + export KC_DB_USERNAME="{{ .Data.data.POSTGRES_USER }}" + export KC_DB_PASSWORD="{{ .Data.data.POSTGRES_PASSWORD }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/portal-e2e-client" }} + export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}" + export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/openldap-admin" }} + export LDAP_ADMIN_PASSWORD="{{ .Data.data.LDAP_ADMIN_PASSWORD }}" + export LDAP_CONFIG_PASSWORD="{{ .Data.data.LDAP_CONFIG_PASSWORD }}" + export LDAP_BIND_PASSWORD="${LDAP_ADMIN_PASSWORD}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export KEYCLOAK_SMTP_USER="{{ index .Data.data "apikey" }}" + export KEYCLOAK_SMTP_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} spec: restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + serviceAccountName: sso-vault containers: - name: test image: python:3.11-alpine @@ -26,20 +71,11 @@ spec: value: "300" - name: RETRY_INTERVAL_SECONDS value: "5" - - name: PORTAL_E2E_CLIENT_ID - valueFrom: - secretKeyRef: - name: portal-e2e-client - key: client_id - - name: PORTAL_E2E_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: portal-e2e-client - key: client_secret command: ["/bin/sh", "-c"] args: - | set -euo pipefail + . /vault/secrets/keycloak-env.sh python /scripts/test_portal_token_exchange.py volumeMounts: - name: tests diff --git a/services/keycloak/realm-settings-job.yaml b/services/keycloak/realm-settings-job.yaml index bdc816d..f680200 100644 --- a/services/keycloak/realm-settings-job.yaml +++ b/services/keycloak/realm-settings-job.yaml @@ -2,11 +2,41 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-realm-settings-16 + name: keycloak-realm-settings-32 namespace: sso spec: backoffLimit: 0 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "sso" + vault.hashicorp.com/agent-inject-secret-keycloak-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/keycloak-db" }} + export KC_DB_URL_DATABASE="{{ .Data.data.POSTGRES_DATABASE }}" + export KC_DB_USERNAME="{{ .Data.data.POSTGRES_USER }}" + export KC_DB_PASSWORD="{{ .Data.data.POSTGRES_PASSWORD }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/portal-e2e-client" }} + export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}" + export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/openldap-admin" }} + export LDAP_ADMIN_PASSWORD="{{ .Data.data.LDAP_ADMIN_PASSWORD }}" + export LDAP_CONFIG_PASSWORD="{{ .Data.data.LDAP_CONFIG_PASSWORD }}" + export LDAP_BIND_PASSWORD="${LDAP_ADMIN_PASSWORD}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export KEYCLOAK_SMTP_USER="{{ index .Data.data "apikey" }}" + export KEYCLOAK_SMTP_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} spec: affinity: nodeAffinity: @@ -19,6 +49,7 @@ spec: - key: node-role.kubernetes.io/worker operator: Exists restartPolicy: Never + serviceAccountName: sso-vault containers: - name: configure image: python:3.11-alpine @@ -27,35 +58,27 @@ spec: value: http://keycloak.sso.svc.cluster.local - name: KEYCLOAK_REALM value: atlas - - name: KEYCLOAK_ADMIN_USER - valueFrom: - secretKeyRef: - name: keycloak-admin - key: username - - name: KEYCLOAK_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: keycloak-admin - key: password - name: KEYCLOAK_SMTP_HOST - value: mailu-front.mailu-mailserver.svc.cluster.local + value: mail.bstein.dev - name: KEYCLOAK_SMTP_PORT - value: "25" + value: "587" - name: KEYCLOAK_SMTP_FROM - value: no-reply@bstein.dev + value: no-reply-sso@bstein.dev - name: KEYCLOAK_SMTP_FROM_NAME value: Atlas SSO - name: KEYCLOAK_SMTP_REPLY_TO - value: no-reply@bstein.dev + value: no-reply-sso@bstein.dev - name: KEYCLOAK_SMTP_REPLY_TO_NAME value: Atlas SSO command: ["/bin/sh", "-c"] args: - | set -euo pipefail + . /vault/secrets/keycloak-env.sh python - <<'PY' import json import os + import time import urllib.parse import urllib.error import urllib.request @@ -95,18 +118,27 @@ spec: "password": admin_password, } ).encode() - token_req = urllib.request.Request( - f"{base_url}/realms/master/protocol/openid-connect/token", - data=token_data, - headers={"Content-Type": "application/x-www-form-urlencoded"}, - method="POST", - ) - try: - with urllib.request.urlopen(token_req, timeout=10) as resp: - token_body = json.loads(resp.read().decode()) - except urllib.error.HTTPError as exc: - body = exc.read().decode(errors="replace") - raise SystemExit(f"Token request failed: status={exc.code} body={body}") + token_body = None + for attempt in range(1, 11): + token_req = urllib.request.Request( + f"{base_url}/realms/master/protocol/openid-connect/token", + data=token_data, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + method="POST", + ) + try: + with urllib.request.urlopen(token_req, timeout=10) as resp: + token_body = json.loads(resp.read().decode()) + break + except urllib.error.HTTPError as exc: + body = exc.read().decode(errors="replace") + raise SystemExit(f"Token request failed: status={exc.code} body={body}") + except urllib.error.URLError as exc: + if attempt == 10: + raise SystemExit(f"Token request failed after retries: {exc}") + time.sleep(attempt * 2) + if not token_body: + raise SystemExit("Token request failed without response") access_token = token_body["access_token"] # Update realm settings safely by fetching the full realm representation first. @@ -126,8 +158,10 @@ spec: "fromDisplayName": os.environ["KEYCLOAK_SMTP_FROM_NAME"], "replyTo": os.environ["KEYCLOAK_SMTP_REPLY_TO"], "replyToDisplayName": os.environ["KEYCLOAK_SMTP_REPLY_TO_NAME"], - "auth": "false", - "starttls": "false", + "user": os.environ["KEYCLOAK_SMTP_USER"], + "password": os.environ["KEYCLOAK_SMTP_PASSWORD"], + "auth": "true", + "starttls": "true", "ssl": "false", } ) @@ -186,6 +220,14 @@ spec: "permissions": {"view": ["admin"], "edit": ["admin"]}, "validations": {"length": {"max": 255}}, }, + { + "name": "mailu_enabled", + "displayName": "Atlas Mailbox Enabled", + "multivalued": False, + "annotations": {"group": "user-metadata"}, + "permissions": {"view": ["admin"], "edit": ["admin"]}, + "validations": {"length": {"max": 16}}, + }, { "name": "nextcloud_mail_primary_email", "displayName": "Nextcloud Mail Primary Email", @@ -210,6 +252,38 @@ spec: "permissions": {"view": ["admin"], "edit": ["admin"]}, "validations": {"length": {"max": 64}}, }, + { + "name": "wger_password", + "displayName": "Wger Password", + "multivalued": False, + "annotations": {"group": "user-metadata"}, + "permissions": {"view": ["admin"], "edit": ["admin"]}, + "validations": {"length": {"max": 255}}, + }, + { + "name": "wger_password_updated_at", + "displayName": "Wger Password Updated At", + "multivalued": False, + "annotations": {"group": "user-metadata"}, + "permissions": {"view": ["admin"], "edit": ["admin"]}, + "validations": {"length": {"max": 64}}, + }, + { + "name": "firefly_password", + "displayName": "Firefly Password", + "multivalued": False, + "annotations": {"group": "user-metadata"}, + "permissions": {"view": ["admin"], "edit": ["admin"]}, + "validations": {"length": {"max": 255}}, + }, + { + "name": "firefly_password_updated_at", + "displayName": "Firefly Password Updated At", + "multivalued": False, + "annotations": {"group": "user-metadata"}, + "permissions": {"view": ["admin"], "edit": ["admin"]}, + "validations": {"length": {"max": 64}}, + }, ] def has_attr(name: str) -> bool: @@ -444,3 +518,4 @@ spec: f"Unexpected execution update response for identity-provider-redirector: {status}" ) PY + volumeMounts: diff --git a/services/keycloak/scripts/actual_oidc_secret_ensure.sh b/services/keycloak/scripts/actual_oidc_secret_ensure.sh new file mode 100644 index 0000000..deb019a --- /dev/null +++ b/services/keycloak/scripts/actual_oidc_secret_ensure.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env sh +set -euo pipefail + +. /vault/secrets/keycloak-admin-env.sh + +KC_URL="http://keycloak.sso.svc.cluster.local" +ACCESS_TOKEN="" +for attempt in 1 2 3 4 5 6 7 8 9 10; do + if curl -fsS "${KC_URL}/realms/master" >/dev/null 2>&1; then + break + fi + echo "Waiting for Keycloak to be reachable (attempt ${attempt})" >&2 + sleep $((attempt * 2)) +done +for attempt in 1 2 3 4 5; do + TOKEN_JSON="$(curl -sS -X POST "$KC_URL/realms/master/protocol/openid-connect/token" \ + -H 'Content-Type: application/x-www-form-urlencoded' \ + -d "grant_type=password" \ + -d "client_id=admin-cli" \ + -d "username=${KEYCLOAK_ADMIN}" \ + -d "password=${KEYCLOAK_ADMIN_PASSWORD}" || true)" + ACCESS_TOKEN="$(echo "$TOKEN_JSON" | jq -r '.access_token' 2>/dev/null || true)" + if [ -n "$ACCESS_TOKEN" ] && [ "$ACCESS_TOKEN" != "null" ]; then + break + fi + echo "Keycloak token request failed (attempt ${attempt})" >&2 + sleep $((attempt * 2)) +done +if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then + echo "Failed to fetch Keycloak admin token" >&2 + exit 1 +fi + +CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients?clientId=actual-budget" || true)" +CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)" + +if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then + create_payload='{"clientId":"actual-budget","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://budget.bstein.dev/openid/callback"],"webOrigins":["https://budget.bstein.dev"],"rootUrl":"https://budget.bstein.dev","baseUrl":"/"}' + status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H 'Content-Type: application/json' \ + -d "${create_payload}" \ + "$KC_URL/admin/realms/atlas/clients")" + if [ "$status" != "201" ] && [ "$status" != "204" ] && [ "$status" != "409" ]; then + echo "Keycloak client create failed (status ${status})" >&2 + exit 1 + fi + CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients?clientId=actual-budget" || true)" + CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)" +fi + +if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then + echo "Keycloak client actual-budget not found" >&2 + exit 1 +fi + +CLIENT_SECRET="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value' 2>/dev/null || true)" +if [ -z "$CLIENT_SECRET" ] || [ "$CLIENT_SECRET" = "null" ]; then + echo "Keycloak client secret not found" >&2 + exit 1 +fi + +vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}" +vault_role="${VAULT_ROLE:-sso-secrets}" +jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" +login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')" +vault_token="$(curl -sS --request POST --data "${login_payload}" \ + "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')" +if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then + echo "vault login failed" >&2 + exit 1 +fi + +payload="$(jq -nc \ + --arg client_id "actual-budget" \ + --arg client_secret "${CLIENT_SECRET}" \ + '{data:{ACTUAL_OPENID_CLIENT_ID:$client_id, ACTUAL_OPENID_CLIENT_SECRET:$client_secret}}')" + +curl -sS -X POST -H "X-Vault-Token: ${vault_token}" \ + -d "${payload}" "${vault_addr}/v1/kv/data/atlas/finance/actual-oidc" >/dev/null diff --git a/services/keycloak/scripts/harbor_oidc_secret_ensure.sh b/services/keycloak/scripts/harbor_oidc_secret_ensure.sh new file mode 100755 index 0000000..7187d34 --- /dev/null +++ b/services/keycloak/scripts/harbor_oidc_secret_ensure.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env sh +set -euo pipefail + +. /vault/secrets/keycloak-admin-env.sh + +KC_URL="http://keycloak.sso.svc.cluster.local" +ACCESS_TOKEN="" +for attempt in 1 2 3 4 5; do + TOKEN_JSON="$(curl -sS -X POST "$KC_URL/realms/master/protocol/openid-connect/token" \ + -H 'Content-Type: application/x-www-form-urlencoded' \ + -d "grant_type=password" \ + -d "client_id=admin-cli" \ + -d "username=${KEYCLOAK_ADMIN}" \ + -d "password=${KEYCLOAK_ADMIN_PASSWORD}" || true)" + ACCESS_TOKEN="$(echo "$TOKEN_JSON" | jq -r '.access_token' 2>/dev/null || true)" + if [ -n "$ACCESS_TOKEN" ] && [ "$ACCESS_TOKEN" != "null" ]; then + break + fi + echo "Keycloak token request failed (attempt ${attempt})" >&2 + sleep $((attempt * 2)) +done +if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then + echo "Failed to fetch Keycloak admin token" >&2 + exit 1 +fi + +CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients?clientId=harbor" || true)" +CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)" + +if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then + create_payload='{"clientId":"harbor","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://registry.bstein.dev/c/oidc/callback"],"webOrigins":["https://registry.bstein.dev"],"rootUrl":"https://registry.bstein.dev","baseUrl":"/"}' + status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H 'Content-Type: application/json' \ + -d "${create_payload}" \ + "$KC_URL/admin/realms/atlas/clients")" + if [ "$status" != "201" ] && [ "$status" != "204" ]; then + echo "Keycloak client create failed (status ${status})" >&2 + exit 1 + fi + CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients?clientId=harbor" || true)" + CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)" +fi + +if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then + echo "Keycloak client harbor not found" >&2 + exit 1 +fi + +SCOPE_ID="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/client-scopes?search=groups" | jq -r '.[] | select(.name=="groups") | .id' 2>/dev/null | head -n1 || true)" +if [ -z "$SCOPE_ID" ] || [ "$SCOPE_ID" = "null" ]; then + echo "Keycloak client scope groups not found" >&2 + exit 1 +fi + +DEFAULT_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/default-client-scopes" || true)" +OPTIONAL_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes" || true)" + +if ! echo "$DEFAULT_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1 \ + && ! echo "$OPTIONAL_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1; then + status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")" + if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then + status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")" + if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then + echo "Failed to attach groups client scope to harbor (status ${status})" >&2 + exit 1 + fi + fi +fi + +CLIENT_SECRET="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value' 2>/dev/null || true)" +if [ -z "$CLIENT_SECRET" ] || [ "$CLIENT_SECRET" = "null" ]; then + echo "Keycloak client secret not found" >&2 + exit 1 +fi + +CONFIG_OVERWRITE_JSON="$(jq -nc \ + --arg auth_mode "oidc_auth" \ + --arg oidc_name "Keycloak" \ + --arg oidc_client_id "harbor" \ + --arg oidc_client_secret "${CLIENT_SECRET}" \ + --arg oidc_endpoint "https://sso.bstein.dev/realms/atlas" \ + --arg oidc_scope "openid,profile,email,groups" \ + --arg oidc_user_claim "preferred_username" \ + --arg oidc_groups_claim "groups" \ + --arg oidc_admin_group "admin" \ + --argjson oidc_auto_onboard true \ + --argjson oidc_verify_cert true \ + --argjson oidc_logout true \ + '{auth_mode:$auth_mode,oidc_name:$oidc_name,oidc_client_id:$oidc_client_id,oidc_client_secret:$oidc_client_secret,oidc_endpoint:$oidc_endpoint,oidc_scope:$oidc_scope,oidc_user_claim:$oidc_user_claim,oidc_groups_claim:$oidc_groups_claim,oidc_admin_group:$oidc_admin_group,oidc_auto_onboard:$oidc_auto_onboard,oidc_verify_cert:$oidc_verify_cert,oidc_logout:$oidc_logout}')" + +vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}" +vault_role="${VAULT_ROLE:-sso-secrets}" +jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" +login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')" +vault_token="$(curl -sS --request POST --data "${login_payload}" \ + "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')" +if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then + echo "vault login failed" >&2 + exit 1 +fi + +payload="$(jq -nc --arg value "${CONFIG_OVERWRITE_JSON}" '{data:{CONFIG_OVERWRITE_JSON:$value}}')" +curl -sS -X POST -H "X-Vault-Token: ${vault_token}" \ + -d "${payload}" "${vault_addr}/v1/kv/data/atlas/harbor/harbor-oidc" >/dev/null diff --git a/services/keycloak/scripts/sso_portal_e2e_client_secret_sync.sh b/services/keycloak/scripts/sso_portal_e2e_client_secret_sync.sh deleted file mode 100755 index bf944ca..0000000 --- a/services/keycloak/scripts/sso_portal_e2e_client_secret_sync.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash -set -euo pipefail - -SOURCE_NAMESPACE="${SOURCE_NAMESPACE:-sso}" -DEST_NAMESPACE="${DEST_NAMESPACE:-bstein-dev-home}" -SECRET_NAME="${SECRET_NAME:-portal-e2e-client}" - -client_id="$(kubectl -n "${SOURCE_NAMESPACE}" get secret "${SECRET_NAME}" -o jsonpath='{.data.client_id}')" -client_secret="$(kubectl -n "${SOURCE_NAMESPACE}" get secret "${SECRET_NAME}" -o jsonpath='{.data.client_secret}')" - -cat </dev/null -apiVersion: v1 -kind: Secret -metadata: - name: ${SECRET_NAME} -type: Opaque -data: - client_id: ${client_id} - client_secret: ${client_secret} -EOF diff --git a/services/keycloak/scripts/vault_oidc_secret_ensure.sh b/services/keycloak/scripts/vault_oidc_secret_ensure.sh new file mode 100755 index 0000000..a951cfa --- /dev/null +++ b/services/keycloak/scripts/vault_oidc_secret_ensure.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env sh +set -euo pipefail + +. /vault/secrets/keycloak-admin-env.sh + +KC_URL="http://keycloak.sso.svc.cluster.local" +ACCESS_TOKEN="" +for attempt in 1 2 3 4 5 6 7 8 9 10; do + if curl -fsS "${KC_URL}/realms/master" >/dev/null 2>&1; then + break + fi + echo "Waiting for Keycloak to be reachable (attempt ${attempt})" >&2 + sleep $((attempt * 2)) +done +for attempt in 1 2 3 4 5; do + TOKEN_JSON="$(curl -sS -X POST "$KC_URL/realms/master/protocol/openid-connect/token" \ + -H 'Content-Type: application/x-www-form-urlencoded' \ + -d "grant_type=password" \ + -d "client_id=admin-cli" \ + -d "username=${KEYCLOAK_ADMIN}" \ + -d "password=${KEYCLOAK_ADMIN_PASSWORD}" || true)" + ACCESS_TOKEN="$(echo "$TOKEN_JSON" | jq -r '.access_token' 2>/dev/null || true)" + if [ -n "$ACCESS_TOKEN" ] && [ "$ACCESS_TOKEN" != "null" ]; then + break + fi + echo "Keycloak token request failed (attempt ${attempt})" >&2 + sleep $((attempt * 2)) +done +if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then + echo "Failed to fetch Keycloak admin token" >&2 + exit 1 +fi + +CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients?clientId=vault-oidc" || true)" +CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)" + +if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then + create_payload='{"clientId":"vault-oidc","enabled":true,"protocol":"openid-connect","publicClient":false,"standardFlowEnabled":true,"implicitFlowEnabled":false,"directAccessGrantsEnabled":false,"serviceAccountsEnabled":false,"redirectUris":["https://secret.bstein.dev/ui/vault/auth/oidc/oidc/callback","http://localhost:8250/oidc/callback"],"webOrigins":["https://secret.bstein.dev"],"rootUrl":"https://secret.bstein.dev","baseUrl":"/"}' + status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + -H 'Content-Type: application/json' \ + -d "${create_payload}" \ + "$KC_URL/admin/realms/atlas/clients")" + if [ "$status" != "201" ] && [ "$status" != "204" ] && [ "$status" != "409" ]; then + echo "Keycloak client create failed (status ${status})" >&2 + exit 1 + fi + CLIENT_QUERY="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients?clientId=vault-oidc" || true)" + CLIENT_ID="$(echo "$CLIENT_QUERY" | jq -r '.[0].id' 2>/dev/null || true)" +fi + +if [ -z "$CLIENT_ID" ] || [ "$CLIENT_ID" = "null" ]; then + echo "Keycloak client vault-oidc not found" >&2 + exit 1 +fi + +SCOPE_ID="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/client-scopes?search=groups" | jq -r '.[] | select(.name=="groups") | .id' 2>/dev/null | head -n1 || true)" +if [ -z "$SCOPE_ID" ] || [ "$SCOPE_ID" = "null" ]; then + echo "Keycloak client scope groups not found" >&2 + exit 1 +fi + +DEFAULT_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/default-client-scopes" || true)" +OPTIONAL_SCOPES="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes" || true)" + +if ! echo "$DEFAULT_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1 \ + && ! echo "$OPTIONAL_SCOPES" | jq -e '.[] | select(.name=="groups")' >/dev/null 2>&1; then + status="$(curl -sS -o /dev/null -w "%{http_code}" -X PUT \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")" + if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then + status="$(curl -sS -o /dev/null -w "%{http_code}" -X POST \ + -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/optional-client-scopes/${SCOPE_ID}")" + if [ "$status" != "200" ] && [ "$status" != "201" ] && [ "$status" != "204" ]; then + echo "Failed to attach groups client scope to vault-oidc (status ${status})" >&2 + exit 1 + fi + fi +fi + +CLIENT_SECRET="$(curl -sS -H "Authorization: Bearer ${ACCESS_TOKEN}" \ + "$KC_URL/admin/realms/atlas/clients/${CLIENT_ID}/client-secret" | jq -r '.value' 2>/dev/null || true)" +if [ -z "$CLIENT_SECRET" ] || [ "$CLIENT_SECRET" = "null" ]; then + echo "Keycloak client secret not found" >&2 + exit 1 +fi + +vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}" +vault_role="${VAULT_ROLE:-sso-secrets}" +jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" +login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')" +vault_token="$(curl -sS --request POST --data "${login_payload}" \ + "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')" +if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then + echo "vault login failed" >&2 + exit 1 +fi + +payload="$(jq -nc \ + --arg discovery_url "https://sso.bstein.dev/realms/atlas" \ + --arg client_id "vault-oidc" \ + --arg client_secret "${CLIENT_SECRET}" \ + --arg default_role "admin" \ + --arg scopes "openid profile email groups" \ + --arg user_claim "preferred_username" \ + --arg groups_claim "groups" \ + --arg redirect_uris "https://secret.bstein.dev/ui/vault/auth/oidc/oidc/callback,http://localhost:8250/oidc/callback" \ + --arg bound_audiences "vault-oidc" \ + --arg admin_group "admin" \ + --arg admin_policies "default,vault-admin" \ + --arg dev_group "dev" \ + --arg dev_policies "default,dev-kv" \ + --arg user_group "dev" \ + --arg user_policies "default,dev-kv" \ + '{data:{discovery_url:$discovery_url,client_id:$client_id,client_secret:$client_secret,default_role:$default_role,scopes:$scopes,user_claim:$user_claim,groups_claim:$groups_claim,redirect_uris:$redirect_uris,bound_audiences:$bound_audiences,admin_group:$admin_group,admin_policies:$admin_policies,dev_group:$dev_group,dev_policies:$dev_policies,user_group:$user_group,user_policies:$user_policies}}')" +curl -sS -X POST -H "X-Vault-Token: ${vault_token}" \ + -d "${payload}" "${vault_addr}/v1/kv/data/atlas/vault/vault-oidc-config" >/dev/null diff --git a/services/keycloak/secretproviderclass.yaml b/services/keycloak/secretproviderclass.yaml new file mode 100644 index 0000000..86cebd2 --- /dev/null +++ b/services/keycloak/secretproviderclass.yaml @@ -0,0 +1,21 @@ +# services/keycloak/secretproviderclass.yaml +apiVersion: secrets-store.csi.x-k8s.io/v1 +kind: SecretProviderClass +metadata: + name: sso-vault + namespace: sso +spec: + provider: vault + parameters: + vaultAddress: "http://vault.vault.svc.cluster.local:8200" + roleName: "sso" + objects: | + - objectName: "harbor-pull__dockerconfigjson" + secretPath: "kv/data/atlas/harbor-pull/sso" + secretKey: "dockerconfigjson" + secretObjects: + - secretName: harbor-regcred + type: kubernetes.io/dockerconfigjson + data: + - objectName: harbor-pull__dockerconfigjson + key: .dockerconfigjson diff --git a/services/keycloak/serviceaccount.yaml b/services/keycloak/serviceaccount.yaml new file mode 100644 index 0000000..5f581c1 --- /dev/null +++ b/services/keycloak/serviceaccount.yaml @@ -0,0 +1,8 @@ +# services/keycloak/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: sso-vault + namespace: sso +imagePullSecrets: + - name: harbor-regcred diff --git a/services/keycloak/synapse-oidc-secret-ensure-job.yaml b/services/keycloak/synapse-oidc-secret-ensure-job.yaml index 7486ced..e808e7e 100644 --- a/services/keycloak/synapse-oidc-secret-ensure-job.yaml +++ b/services/keycloak/synapse-oidc-secret-ensure-job.yaml @@ -2,24 +2,49 @@ apiVersion: batch/v1 kind: Job metadata: - name: synapse-oidc-secret-ensure-4 + name: synapse-oidc-secret-ensure-10 namespace: sso spec: backoffLimit: 0 ttlSecondsAfterFinished: 3600 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "sso-secrets" + vault.hashicorp.com/agent-inject-secret-keycloak-admin-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-admin-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} spec: serviceAccountName: mas-secrets-ensure restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] containers: - name: apply - image: alpine:3.20 + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 command: ["/bin/sh", "-c"] args: - | set -euo pipefail - apk add --no-cache curl jq kubectl >/dev/null - + . /vault/secrets/keycloak-admin-env.sh KC_URL="http://keycloak.sso.svc.cluster.local" ACCESS_TOKEN="" for attempt in 1 2 3 4 5; do @@ -54,22 +79,19 @@ spec: exit 1 fi - existing="$(kubectl -n comms get secret synapse-oidc -o jsonpath='{.data.client-secret}' 2>/dev/null || true)" - if [ -n "${existing}" ]; then - exit 0 + vault_addr="${VAULT_ADDR:-http://vault.vault.svc.cluster.local:8200}" + vault_role="${VAULT_ROLE:-sso-secrets}" + jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" + login_payload="$(jq -nc --arg jwt "${jwt}" --arg role "${vault_role}" '{jwt:$jwt, role:$role}')" + vault_token="$(curl -sS --request POST --data "${login_payload}" \ + "${vault_addr}/v1/auth/kubernetes/login" | jq -r '.auth.client_token')" + if [ -z "${vault_token}" ] || [ "${vault_token}" = "null" ]; then + echo "vault login failed" >&2 + exit 1 fi - kubectl -n comms create secret generic synapse-oidc \ - --from-literal=client-secret="${CLIENT_SECRET}" \ - --dry-run=client -o yaml | kubectl -n comms apply -f - >/dev/null - env: - - name: KEYCLOAK_ADMIN - valueFrom: - secretKeyRef: - name: keycloak-admin - key: username - - name: KEYCLOAK_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: keycloak-admin - key: password + payload="$(jq -nc --arg value "${CLIENT_SECRET}" '{data:{"client-secret":$value}}')" + curl -sS -X POST -H "X-Vault-Token: ${vault_token}" \ + -d "${payload}" "${vault_addr}/v1/kv/data/atlas/comms/synapse-oidc" >/dev/null + volumeMounts: + volumes: diff --git a/services/keycloak/user-overrides-job.yaml b/services/keycloak/user-overrides-job.yaml index 43813ee..7623c84 100644 --- a/services/keycloak/user-overrides-job.yaml +++ b/services/keycloak/user-overrides-job.yaml @@ -2,11 +2,41 @@ apiVersion: batch/v1 kind: Job metadata: - name: keycloak-user-overrides-1 + name: keycloak-user-overrides-9 namespace: sso spec: backoffLimit: 0 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "sso" + vault.hashicorp.com/agent-inject-secret-keycloak-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/keycloak-db" }} + export KC_DB_URL_DATABASE="{{ .Data.data.POSTGRES_DATABASE }}" + export KC_DB_USERNAME="{{ .Data.data.POSTGRES_USER }}" + export KC_DB_PASSWORD="{{ .Data.data.POSTGRES_PASSWORD }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/portal-e2e-client" }} + export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}" + export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}" + {{ end }} + {{ with secret "kv/data/atlas/sso/openldap-admin" }} + export LDAP_ADMIN_PASSWORD="{{ .Data.data.LDAP_ADMIN_PASSWORD }}" + export LDAP_CONFIG_PASSWORD="{{ .Data.data.LDAP_CONFIG_PASSWORD }}" + export LDAP_BIND_PASSWORD="${LDAP_ADMIN_PASSWORD}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export KEYCLOAK_SMTP_USER="{{ index .Data.data "apikey" }}" + export KEYCLOAK_SMTP_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} spec: affinity: nodeAffinity: @@ -19,6 +49,7 @@ spec: - key: node-role.kubernetes.io/worker operator: Exists restartPolicy: Never + serviceAccountName: sso-vault containers: - name: configure image: python:3.11-alpine @@ -27,16 +58,6 @@ spec: value: http://keycloak.sso.svc.cluster.local - name: KEYCLOAK_REALM value: atlas - - name: KEYCLOAK_ADMIN_USER - valueFrom: - secretKeyRef: - name: keycloak-admin - key: username - - name: KEYCLOAK_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: keycloak-admin - key: password - name: OVERRIDE_USERNAME value: bstein - name: OVERRIDE_MAILU_EMAIL @@ -45,6 +66,7 @@ spec: args: - | set -euo pipefail + . /vault/secrets/keycloak-env.sh python - <<'PY' import json import os @@ -128,18 +150,62 @@ spec: if not isinstance(attrs, dict): attrs = {} existing = attrs.get("mailu_email") + needs_update = True if isinstance(existing, list) and existing and existing[0] == override_mailu_email: - raise SystemExit(0) + needs_update = False if isinstance(existing, str) and existing == override_mailu_email: - raise SystemExit(0) + needs_update = False - attrs["mailu_email"] = [override_mailu_email] - status, _ = http_json( - "PUT", - f"{base_url}/admin/realms/{realm}/users/{user_id}", - access_token, - {"attributes": attrs}, - ) - if status not in (200, 204): - raise SystemExit(f"Unexpected user update response: {status}") + if needs_update: + attrs["mailu_email"] = [override_mailu_email] + status, _ = http_json( + "PUT", + f"{base_url}/admin/realms/{realm}/users/{user_id}", + access_token, + {"attributes": attrs}, + ) + if status not in (200, 204): + raise SystemExit(f"Unexpected user update response: {status}") + + # Ensure the user is in the admin and planka-users groups. + def ensure_group(group_name: str) -> None: + status, groups = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/groups?search={urllib.parse.quote(group_name)}", + access_token, + ) + if status != 200 or not isinstance(groups, list): + raise SystemExit("Unable to fetch groups") + group_id = "" + for item in groups: + if isinstance(item, dict) and item.get("name") == group_name: + group_id = item.get("id") or "" + break + if not group_id: + raise SystemExit(f"{group_name} group not found") + status, memberships = http_json( + "GET", + f"{base_url}/admin/realms/{realm}/users/{user_id}/groups", + access_token, + ) + if status != 200 or not isinstance(memberships, list): + raise SystemExit("Unable to read user groups") + already = any( + isinstance(item, dict) and item.get("id") == group_id for item in memberships + ) + if already: + return + status, _ = http_json( + "PUT", + f"{base_url}/admin/realms/{realm}/users/{user_id}/groups/{group_id}", + access_token, + ) + if status not in (200, 204): + raise SystemExit( + f"Unexpected group update response for {group_name}: {status}" + ) + + for group in ("admin", "planka-users"): + ensure_group(group) PY + volumeMounts: diff --git a/services/keycloak/vault-oidc-secret-ensure-job.yaml b/services/keycloak/vault-oidc-secret-ensure-job.yaml new file mode 100644 index 0000000..3aa3ca5 --- /dev/null +++ b/services/keycloak/vault-oidc-secret-ensure-job.yaml @@ -0,0 +1,48 @@ +# services/keycloak/vault-oidc-secret-ensure-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: vault-oidc-secret-ensure-8 + namespace: sso +spec: + backoffLimit: 0 + ttlSecondsAfterFinished: 3600 + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "sso-secrets" + vault.hashicorp.com/agent-inject-secret-keycloak-admin-env.sh: "kv/data/atlas/shared/keycloak-admin" + vault.hashicorp.com/agent-inject-template-keycloak-admin-env.sh: | + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KEYCLOAK_ADMIN="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_USER="{{ .Data.data.username }}" + export KEYCLOAK_ADMIN_PASSWORD="{{ .Data.data.password }}" + {{ end }} + spec: + serviceAccountName: mas-secrets-ensure + restartPolicy: Never + volumes: + - name: vault-oidc-secret-ensure-script + configMap: + name: vault-oidc-secret-ensure-script + defaultMode: 0555 + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + - key: node-role.kubernetes.io/worker + operator: Exists + containers: + - name: apply + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/scripts/vault_oidc_secret_ensure.sh"] + volumeMounts: + - name: vault-oidc-secret-ensure-script + mountPath: /scripts + readOnly: true diff --git a/services/keycloak/vault-serviceaccount.yaml b/services/keycloak/vault-serviceaccount.yaml new file mode 100644 index 0000000..79fa47c --- /dev/null +++ b/services/keycloak/vault-serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/keycloak/vault-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: sso-vault-sync + namespace: sso diff --git a/services/keycloak/vault-sync-deployment.yaml b/services/keycloak/vault-sync-deployment.yaml new file mode 100644 index 0000000..a9afcd0 --- /dev/null +++ b/services/keycloak/vault-sync-deployment.yaml @@ -0,0 +1,34 @@ +# services/keycloak/vault-sync-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sso-vault-sync + namespace: sso +spec: + replicas: 1 + selector: + matchLabels: + app: sso-vault-sync + template: + metadata: + labels: + app: sso-vault-sync + spec: + serviceAccountName: sso-vault-sync + containers: + - name: sync + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - "sleep infinity" + volumeMounts: + - name: vault-secrets + mountPath: /vault/secrets + readOnly: true + volumes: + - name: vault-secrets + csi: + driver: secrets-store.csi.k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: sso-vault diff --git a/services/logging/data-prepper-helmrelease.yaml b/services/logging/data-prepper-helmrelease.yaml index 8b27052..1c0bc45 100644 --- a/services/logging/data-prepper-helmrelease.yaml +++ b/services/logging/data-prepper-helmrelease.yaml @@ -7,6 +7,10 @@ metadata: spec: interval: 15m timeout: 10m + install: + disableWait: true + upgrade: + disableWait: true chart: spec: chart: data-prepper @@ -22,44 +26,14 @@ spec: repository: registry.bstein.dev/streaming/data-prepper tag: "2.8.0" imagePullSecrets: - - name: harbor-robot-pipeline + - name: harbor-regcred config: data-prepper-config.yaml: | ssl: false pipelineConfig: - enabled: true - config: - entry-pipeline: - delay: "100" - source: - otel_trace_source: - ssl: false - sink: - - pipeline: - name: "raw-pipeline" - - pipeline: - name: "service-map-pipeline" - raw-pipeline: - source: - pipeline: - name: "entry-pipeline" - processor: - - otel_traces: - sink: - - opensearch: - hosts: ["http://opensearch-master.logging.svc.cluster.local:9200"] - index_type: trace-analytics-raw - service-map-pipeline: - delay: "100" - source: - pipeline: - name: "entry-pipeline" - processor: - - service_map: - sink: - - opensearch: - hosts: ["http://opensearch-master.logging.svc.cluster.local:9200"] - index_type: trace-analytics-service-map + demoPipeline: false + enabled: false + existingSecret: data-prepper-pipeline resources: requests: cpu: "200m" diff --git a/services/logging/ingress.yaml b/services/logging/ingress.yaml index 7beeb9a..eafeb5d 100644 --- a/services/logging/ingress.yaml +++ b/services/logging/ingress.yaml @@ -6,6 +6,8 @@ metadata: namespace: logging annotations: cert-manager.io/cluster-issuer: letsencrypt + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" spec: ingressClassName: traefik tls: diff --git a/services/logging/kustomization.yaml b/services/logging/kustomization.yaml index fe010f6..08c73a8 100644 --- a/services/logging/kustomization.yaml +++ b/services/logging/kustomization.yaml @@ -8,6 +8,8 @@ resources: - node-log-rotation-serviceaccount.yaml - node-image-gc-rpi4-serviceaccount.yaml - node-image-prune-rpi5-serviceaccount.yaml + - vault-serviceaccount.yaml + - secretproviderclass.yaml - opensearch-pvc.yaml - opensearch-helmrelease.yaml - opensearch-dashboards-helmrelease.yaml @@ -22,6 +24,7 @@ resources: - node-image-gc-rpi4-daemonset.yaml - node-image-prune-rpi5-daemonset.yaml - oauth2-proxy.yaml + - vault-sync-deployment.yaml - ingress.yaml configMapGenerator: @@ -55,3 +58,11 @@ configMapGenerator: - seed.py=scripts/opensearch_observability_seed.py options: disableNameSuffixHash: true + +secretGenerator: + - name: data-prepper-pipeline + namespace: logging + files: + - pipelines.yaml=scripts/data_prepper_pipelines.yaml + options: + disableNameSuffixHash: true diff --git a/services/logging/oauth2-proxy.yaml b/services/logging/oauth2-proxy.yaml index ecebfa7..104351a 100644 --- a/services/logging/oauth2-proxy.yaml +++ b/services/logging/oauth2-proxy.yaml @@ -32,7 +32,20 @@ spec: metadata: labels: app: oauth2-proxy-logs + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "logging" + vault.hashicorp.com/agent-inject-secret-oidc-env: "kv/data/atlas/logging/oauth2-proxy-logs-oidc" + vault.hashicorp.com/agent-inject-template-oidc-env: | + {{- with secret "kv/data/atlas/logging/oauth2-proxy-logs-oidc" -}} + export OAUTH2_PROXY_CLIENT_ID="{{ .Data.data.client_id }}" + export OAUTH2_PROXY_CLIENT_SECRET="{{ .Data.data.client_secret }}" + export OAUTH2_PROXY_COOKIE_SECRET="{{ .Data.data.cookie_secret }}" + {{- end -}} spec: + serviceAccountName: logging-vault-sync + imagePullSecrets: + - name: harbor-regcred nodeSelector: node-role.kubernetes.io/worker: "true" affinity: @@ -47,9 +60,11 @@ spec: - rpi4 containers: - name: oauth2-proxy - image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 + image: registry.bstein.dev/tools/oauth2-proxy-vault:v7.6.0 imagePullPolicy: IfNotPresent + command: ["/entrypoint.sh"] args: + - /bin/oauth2-proxy - --provider=oidc - --redirect-url=https://logs.bstein.dev/oauth2/callback - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas @@ -70,21 +85,8 @@ spec: - --skip-jwt-bearer-tokens=true - --cookie-domain=logs.bstein.dev env: - - name: OAUTH2_PROXY_CLIENT_ID - valueFrom: - secretKeyRef: - name: oauth2-proxy-logs-oidc - key: client_id - - name: OAUTH2_PROXY_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: oauth2-proxy-logs-oidc - key: client_secret - - name: OAUTH2_PROXY_COOKIE_SECRET - valueFrom: - secretKeyRef: - name: oauth2-proxy-logs-oidc - key: cookie_secret + - name: VAULT_ENV_FILE + value: /vault/secrets/oidc-env ports: - containerPort: 4180 name: http diff --git a/services/logging/scripts/data_prepper_pipelines.yaml b/services/logging/scripts/data_prepper_pipelines.yaml new file mode 100644 index 0000000..5e244ff --- /dev/null +++ b/services/logging/scripts/data_prepper_pipelines.yaml @@ -0,0 +1,31 @@ +entry-pipeline: + delay: "100" + source: + otel_trace_source: + ssl: false + sink: + - pipeline: + name: "raw-pipeline" + - pipeline: + name: "service-map-pipeline" +raw-pipeline: + source: + pipeline: + name: "entry-pipeline" + processor: + - otel_traces: + sink: + - opensearch: + hosts: ["http://opensearch-master.logging.svc.cluster.local:9200"] + index_type: trace-analytics-raw +service-map-pipeline: + delay: "100" + source: + pipeline: + name: "entry-pipeline" + processor: + - service_map: + sink: + - opensearch: + hosts: ["http://opensearch-master.logging.svc.cluster.local:9200"] + index_type: trace-analytics-service-map diff --git a/services/logging/secretproviderclass.yaml b/services/logging/secretproviderclass.yaml new file mode 100644 index 0000000..f5db15e --- /dev/null +++ b/services/logging/secretproviderclass.yaml @@ -0,0 +1,21 @@ +# services/logging/secretproviderclass.yaml +apiVersion: secrets-store.csi.x-k8s.io/v1 +kind: SecretProviderClass +metadata: + name: logging-vault + namespace: logging +spec: + provider: vault + parameters: + vaultAddress: "http://vault.vault.svc.cluster.local:8200" + roleName: "logging" + objects: | + - objectName: "harbor-pull__dockerconfigjson" + secretPath: "kv/data/atlas/harbor-pull/logging" + secretKey: "dockerconfigjson" + secretObjects: + - secretName: harbor-regcred + type: kubernetes.io/dockerconfigjson + data: + - objectName: harbor-pull__dockerconfigjson + key: .dockerconfigjson diff --git a/services/logging/vault-serviceaccount.yaml b/services/logging/vault-serviceaccount.yaml new file mode 100644 index 0000000..9104c20 --- /dev/null +++ b/services/logging/vault-serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/logging/vault-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: logging-vault-sync + namespace: logging diff --git a/services/logging/vault-sync-deployment.yaml b/services/logging/vault-sync-deployment.yaml new file mode 100644 index 0000000..41a4f7d --- /dev/null +++ b/services/logging/vault-sync-deployment.yaml @@ -0,0 +1,34 @@ +# services/logging/vault-sync-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: logging-vault-sync + namespace: logging +spec: + replicas: 1 + selector: + matchLabels: + app: logging-vault-sync + template: + metadata: + labels: + app: logging-vault-sync + spec: + serviceAccountName: logging-vault-sync + containers: + - name: sync + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - "sleep infinity" + volumeMounts: + - name: vault-secrets + mountPath: /vault/secrets + readOnly: true + volumes: + - name: vault-secrets + csi: + driver: secrets-store.csi.k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: logging-vault diff --git a/services/mailu/helmrelease.yaml b/services/mailu/helmrelease.yaml index e675961..7342141 100644 --- a/services/mailu/helmrelease.yaml +++ b/services/mailu/helmrelease.yaml @@ -305,3 +305,426 @@ spec: submission: port: 587 targetPort: 587 + postRenderers: + - kustomize: + patches: + - target: + kind: Deployment + name: mailu-admin + patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: mailu-admin + spec: + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "mailu-mailserver" + vault.hashicorp.com/agent-inject-secret-mailu-env.sh: "kv/data/atlas/mailu/mailu-secret" + vault.hashicorp.com/agent-inject-template-mailu-env.sh: | + {{ with secret "kv/data/atlas/mailu/mailu-secret" }} + export SECRET_KEY="{{ index .Data.data "secret-key" }}" + {{ end }} + {{ with secret "kv/data/atlas/mailu/mailu-db-secret" }} + export DB_PW="{{ .Data.data.password }}" + export ROUNDCUBE_DB_PW="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/mailu/mailu-initial-account-secret" }} + export INITIAL_ADMIN_PW="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export RELAYUSER="{{ index .Data.data "apikey" }}" + export RELAYPASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} + spec: + serviceAccountName: mailu-vault-sync + automountServiceAccountToken: true + containers: + - name: admin + command: + - /entrypoint.sh + args: + - python3 + - /start.py + env: + - name: SECRET_KEY + $patch: delete + - name: INITIAL_ADMIN_PW + $patch: delete + - name: DB_PW + $patch: delete + - name: RELAYUSER + $patch: delete + - name: RELAYPASSWORD + $patch: delete + - name: VAULT_ENV_FILE + value: /vault/secrets/mailu-env.sh + volumeMounts: + - name: mailu-vault-entrypoint + mountPath: /entrypoint.sh + subPath: vault-entrypoint.sh + volumes: + - name: mailu-vault-entrypoint + configMap: + name: mailu-vault-entrypoint + defaultMode: 493 + - target: + kind: Deployment + name: mailu-front + patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: mailu-front + spec: + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "mailu-mailserver" + vault.hashicorp.com/agent-inject-secret-mailu-env.sh: "kv/data/atlas/mailu/mailu-secret" + vault.hashicorp.com/agent-inject-template-mailu-env.sh: | + {{ with secret "kv/data/atlas/mailu/mailu-secret" }} + export SECRET_KEY="{{ index .Data.data "secret-key" }}" + {{ end }} + {{ with secret "kv/data/atlas/mailu/mailu-db-secret" }} + export DB_PW="{{ .Data.data.password }}" + export ROUNDCUBE_DB_PW="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/mailu/mailu-initial-account-secret" }} + export INITIAL_ADMIN_PW="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export RELAYUSER="{{ index .Data.data "apikey" }}" + export RELAYPASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} + spec: + serviceAccountName: mailu-vault-sync + automountServiceAccountToken: true + containers: + - name: front + command: + - /entrypoint.sh + args: + - python3 + - /start.py + env: + - name: SECRET_KEY + $patch: delete + - name: INITIAL_ADMIN_PW + $patch: delete + - name: DB_PW + $patch: delete + - name: RELAYUSER + $patch: delete + - name: RELAYPASSWORD + $patch: delete + - name: VAULT_ENV_FILE + value: /vault/secrets/mailu-env.sh + volumeMounts: + - name: mailu-vault-entrypoint + mountPath: /entrypoint.sh + subPath: vault-entrypoint.sh + volumes: + - name: mailu-vault-entrypoint + configMap: + name: mailu-vault-entrypoint + defaultMode: 493 + - target: + kind: Deployment + name: mailu-postfix + patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: mailu-postfix + spec: + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "mailu-mailserver" + vault.hashicorp.com/agent-inject-secret-mailu-env.sh: "kv/data/atlas/mailu/mailu-secret" + vault.hashicorp.com/agent-inject-template-mailu-env.sh: | + {{ with secret "kv/data/atlas/mailu/mailu-secret" }} + export SECRET_KEY="{{ index .Data.data "secret-key" }}" + {{ end }} + {{ with secret "kv/data/atlas/mailu/mailu-db-secret" }} + export DB_PW="{{ .Data.data.password }}" + export ROUNDCUBE_DB_PW="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/mailu/mailu-initial-account-secret" }} + export INITIAL_ADMIN_PW="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export RELAYUSER="{{ index .Data.data "apikey" }}" + export RELAYPASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} + spec: + serviceAccountName: mailu-vault-sync + automountServiceAccountToken: true + containers: + - name: postfix + command: + - /entrypoint.sh + args: + - python3 + - /start.py + env: + - name: SECRET_KEY + $patch: delete + - name: INITIAL_ADMIN_PW + $patch: delete + - name: DB_PW + $patch: delete + - name: RELAYUSER + $patch: delete + - name: RELAYPASSWORD + $patch: delete + - name: VAULT_ENV_FILE + value: /vault/secrets/mailu-env.sh + volumeMounts: + - name: mailu-vault-entrypoint + mountPath: /entrypoint.sh + subPath: vault-entrypoint.sh + volumes: + - name: mailu-vault-entrypoint + configMap: + name: mailu-vault-entrypoint + defaultMode: 493 + - target: + kind: Deployment + name: mailu-dovecot + patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: mailu-dovecot + spec: + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "mailu-mailserver" + vault.hashicorp.com/agent-inject-secret-mailu-env.sh: "kv/data/atlas/mailu/mailu-secret" + vault.hashicorp.com/agent-inject-template-mailu-env.sh: | + {{ with secret "kv/data/atlas/mailu/mailu-secret" }} + export SECRET_KEY="{{ index .Data.data "secret-key" }}" + {{ end }} + {{ with secret "kv/data/atlas/mailu/mailu-db-secret" }} + export DB_PW="{{ .Data.data.password }}" + export ROUNDCUBE_DB_PW="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/mailu/mailu-initial-account-secret" }} + export INITIAL_ADMIN_PW="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export RELAYUSER="{{ index .Data.data "apikey" }}" + export RELAYPASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} + spec: + serviceAccountName: mailu-vault-sync + automountServiceAccountToken: true + containers: + - name: dovecot + command: + - /entrypoint.sh + args: + - python3 + - /start.py + env: + - name: SECRET_KEY + $patch: delete + - name: INITIAL_ADMIN_PW + $patch: delete + - name: DB_PW + $patch: delete + - name: RELAYUSER + $patch: delete + - name: RELAYPASSWORD + $patch: delete + - name: VAULT_ENV_FILE + value: /vault/secrets/mailu-env.sh + volumeMounts: + - name: mailu-vault-entrypoint + mountPath: /entrypoint.sh + subPath: vault-entrypoint.sh + volumes: + - name: mailu-vault-entrypoint + configMap: + name: mailu-vault-entrypoint + defaultMode: 493 + - target: + kind: Deployment + name: mailu-rspamd + patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: mailu-rspamd + spec: + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "mailu-mailserver" + vault.hashicorp.com/agent-inject-secret-mailu-env.sh: "kv/data/atlas/mailu/mailu-secret" + vault.hashicorp.com/agent-inject-template-mailu-env.sh: | + {{ with secret "kv/data/atlas/mailu/mailu-secret" }} + export SECRET_KEY="{{ index .Data.data "secret-key" }}" + {{ end }} + {{ with secret "kv/data/atlas/mailu/mailu-db-secret" }} + export DB_PW="{{ .Data.data.password }}" + export ROUNDCUBE_DB_PW="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/mailu/mailu-initial-account-secret" }} + export INITIAL_ADMIN_PW="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export RELAYUSER="{{ index .Data.data "apikey" }}" + export RELAYPASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} + spec: + serviceAccountName: mailu-vault-sync + automountServiceAccountToken: true + containers: + - name: rspamd + command: + - /entrypoint.sh + args: + - python3 + - /start.py + env: + - name: SECRET_KEY + $patch: delete + - name: INITIAL_ADMIN_PW + $patch: delete + - name: DB_PW + $patch: delete + - name: RELAYUSER + $patch: delete + - name: RELAYPASSWORD + $patch: delete + - name: VAULT_ENV_FILE + value: /vault/secrets/mailu-env.sh + volumeMounts: + - name: mailu-vault-entrypoint + mountPath: /entrypoint.sh + subPath: vault-entrypoint.sh + volumes: + - name: mailu-vault-entrypoint + configMap: + name: mailu-vault-entrypoint + defaultMode: 493 + - target: + kind: Deployment + name: mailu-oletools + patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: mailu-oletools + spec: + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "mailu-mailserver" + vault.hashicorp.com/agent-inject-secret-mailu-env.sh: "kv/data/atlas/mailu/mailu-secret" + vault.hashicorp.com/agent-inject-template-mailu-env.sh: | + {{ with secret "kv/data/atlas/mailu/mailu-secret" }} + export SECRET_KEY="{{ index .Data.data "secret-key" }}" + {{ end }} + {{ with secret "kv/data/atlas/mailu/mailu-db-secret" }} + export DB_PW="{{ .Data.data.password }}" + export ROUNDCUBE_DB_PW="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/mailu/mailu-initial-account-secret" }} + export INITIAL_ADMIN_PW="{{ .Data.data.password }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export RELAYUSER="{{ index .Data.data "apikey" }}" + export RELAYPASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} + spec: + serviceAccountName: mailu-vault-sync + automountServiceAccountToken: true + containers: + - name: oletools + command: + - /entrypoint.sh + args: + - python3 + - /start.py + env: + - name: SECRET_KEY + $patch: delete + - name: INITIAL_ADMIN_PW + $patch: delete + - name: DB_PW + $patch: delete + - name: RELAYUSER + $patch: delete + - name: RELAYPASSWORD + $patch: delete + - name: VAULT_ENV_FILE + value: /vault/secrets/mailu-env.sh + volumeMounts: + - name: mailu-vault-entrypoint + mountPath: /entrypoint.sh + subPath: vault-entrypoint.sh + volumes: + - name: mailu-vault-entrypoint + configMap: + name: mailu-vault-entrypoint + defaultMode: 493 + - target: + kind: StatefulSet + name: mailu-clamav + patch: |- + apiVersion: apps/v1 + kind: StatefulSet + metadata: + name: mailu-clamav + spec: + template: + spec: + containers: + - name: clamav + env: + - name: SECRET_KEY + $patch: delete + - name: INITIAL_ADMIN_PW + $patch: delete + - name: DB_PW + $patch: delete + - name: RELAYUSER + $patch: delete + - name: RELAYPASSWORD + $patch: delete + - target: + kind: Deployment + name: mailu-tika + patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: mailu-tika + spec: + template: + spec: + containers: + - name: tika + env: + - name: SECRET_KEY + $patch: delete + - name: INITIAL_ADMIN_PW + $patch: delete + - name: DB_PW + $patch: delete + - name: RELAYUSER + $patch: delete + - name: RELAYPASSWORD + $patch: delete diff --git a/services/mailu/kustomization.yaml b/services/mailu/kustomization.yaml index af4b2b1..5c111eb 100644 --- a/services/mailu/kustomization.yaml +++ b/services/mailu/kustomization.yaml @@ -4,7 +4,10 @@ kind: Kustomization namespace: mailu-mailserver resources: - namespace.yaml + - serviceaccount.yaml + - secretproviderclass.yaml - helmrelease.yaml + - vault-sync-deployment.yaml - certificate.yaml - vip-controller.yaml - unbound-configmap.yaml @@ -16,6 +19,12 @@ resources: - front-lb.yaml configMapGenerator: + - name: mailu-vault-env + namespace: mailu-mailserver + files: + - mailu_vault_env.sh=scripts/mailu_vault_env.sh + options: + disableNameSuffixHash: true - name: mailu-sync-script namespace: mailu-mailserver files: @@ -26,3 +35,9 @@ configMapGenerator: namespace: mailu-mailserver files: - listener.py=scripts/mailu_sync_listener.py + - name: mailu-vault-entrypoint + namespace: mailu-mailserver + files: + - vault-entrypoint.sh=scripts/vault-entrypoint.sh + options: + disableNameSuffixHash: true diff --git a/services/mailu/mailu-sync-cronjob.yaml b/services/mailu/mailu-sync-cronjob.yaml index 268680f..1da1981 100644 --- a/services/mailu/mailu-sync-cronjob.yaml +++ b/services/mailu/mailu-sync-cronjob.yaml @@ -4,14 +4,40 @@ kind: CronJob metadata: name: mailu-sync-nightly namespace: mailu-mailserver + labels: + atlas.bstein.dev/glue: "true" spec: schedule: "30 4 * * *" concurrencyPolicy: Forbid jobTemplate: spec: template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "mailu-mailserver" + vault.hashicorp.com/agent-inject-secret-mailu-db-secret__database: "kv/data/atlas/mailu/mailu-db-secret" + vault.hashicorp.com/agent-inject-template-mailu-db-secret__database: | + {{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.database }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mailu-db-secret__username: "kv/data/atlas/mailu/mailu-db-secret" + vault.hashicorp.com/agent-inject-template-mailu-db-secret__username: | + {{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.username }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mailu-db-secret__password: "kv/data/atlas/mailu/mailu-db-secret" + vault.hashicorp.com/agent-inject-template-mailu-db-secret__password: | + {{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.password }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-id: "kv/data/atlas/mailu/mailu-sync-credentials" + vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-id: | + {{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-id" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-secret: "kv/data/atlas/mailu/mailu-sync-credentials" + vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-secret: | + {{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-secret" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mailu-initial-account-secret__password: "kv/data/atlas/mailu/mailu-initial-account-secret" + vault.hashicorp.com/agent-inject-template-mailu-initial-account-secret__password: | + {{- with secret "kv/data/atlas/mailu/mailu-initial-account-secret" -}}{{ .Data.data.password }}{{- end -}} spec: restartPolicy: OnFailure + serviceAccountName: mailu-vault-sync containers: - name: mailu-sync image: python:3.11-alpine @@ -19,8 +45,10 @@ spec: command: ["/bin/sh", "-c"] args: - | + set -euo pipefail + . /vault/scripts/mailu_vault_env.sh pip install --no-cache-dir requests psycopg2-binary passlib >/tmp/pip.log \ - && python /app/sync.py + && python /app/sync.py env: - name: KEYCLOAK_BASE_URL value: http://keycloak.sso.svc.cluster.local @@ -30,39 +58,19 @@ spec: value: bstein.dev - name: MAILU_DEFAULT_QUOTA value: "20000000000" + - name: MAILU_SYSTEM_USERS + value: "no-reply-portal@bstein.dev,no-reply-vaultwarden@bstein.dev" - name: MAILU_DB_HOST value: postgres-service.postgres.svc.cluster.local - name: MAILU_DB_PORT value: "5432" - - name: MAILU_DB_NAME - valueFrom: - secretKeyRef: - name: mailu-db-secret - key: database - - name: MAILU_DB_USER - valueFrom: - secretKeyRef: - name: mailu-db-secret - key: username - - name: MAILU_DB_PASSWORD - valueFrom: - secretKeyRef: - name: mailu-db-secret - key: password - - name: KEYCLOAK_CLIENT_ID - valueFrom: - secretKeyRef: - name: mailu-sync-credentials - key: client-id - - name: KEYCLOAK_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: mailu-sync-credentials - key: client-secret volumeMounts: - name: sync-script mountPath: /app/sync.py subPath: sync.py + - name: vault-scripts + mountPath: /vault/scripts + readOnly: true resources: requests: cpu: 50m @@ -75,3 +83,7 @@ spec: configMap: name: mailu-sync-script defaultMode: 0444 + - name: vault-scripts + configMap: + name: mailu-vault-env + defaultMode: 0555 diff --git a/services/mailu/mailu-sync-job.yaml b/services/mailu/mailu-sync-job.yaml index 7230c1d..8589e9e 100644 --- a/services/mailu/mailu-sync-job.yaml +++ b/services/mailu/mailu-sync-job.yaml @@ -2,12 +2,50 @@ apiVersion: batch/v1 kind: Job metadata: - name: mailu-sync + name: mailu-sync-9 namespace: mailu-mailserver spec: template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "mailu-mailserver" + vault.hashicorp.com/agent-inject-secret-mailu-db-secret__database: "kv/data/atlas/mailu/mailu-db-secret" + vault.hashicorp.com/agent-inject-template-mailu-db-secret__database: | + {{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.database }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mailu-db-secret__username: "kv/data/atlas/mailu/mailu-db-secret" + vault.hashicorp.com/agent-inject-template-mailu-db-secret__username: | + {{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.username }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mailu-db-secret__password: "kv/data/atlas/mailu/mailu-db-secret" + vault.hashicorp.com/agent-inject-template-mailu-db-secret__password: | + {{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.password }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-id: "kv/data/atlas/mailu/mailu-sync-credentials" + vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-id: | + {{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-id" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-secret: "kv/data/atlas/mailu/mailu-sync-credentials" + vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-secret: | + {{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-secret" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mailu-initial-account-secret__password: "kv/data/atlas/mailu/mailu-initial-account-secret" + vault.hashicorp.com/agent-inject-template-mailu-initial-account-secret__password: | + {{- with secret "kv/data/atlas/mailu/mailu-initial-account-secret" -}}{{ .Data.data.password }}{{- end -}} spec: restartPolicy: OnFailure + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + serviceAccountName: mailu-vault-sync containers: - name: mailu-sync image: python:3.11-alpine @@ -15,8 +53,10 @@ spec: command: ["/bin/sh", "-c"] args: - | + set -euo pipefail + . /vault/scripts/mailu_vault_env.sh pip install --no-cache-dir requests psycopg2-binary passlib >/tmp/pip.log \ - && python /app/sync.py + && python /app/sync.py env: - name: KEYCLOAK_BASE_URL value: http://keycloak.sso.svc.cluster.local @@ -26,39 +66,19 @@ spec: value: bstein.dev - name: MAILU_DEFAULT_QUOTA value: "20000000000" + - name: MAILU_SYSTEM_USERS + value: no-reply-portal@bstein.dev,no-reply-vaultwarden@bstein.dev - name: MAILU_DB_HOST value: postgres-service.postgres.svc.cluster.local - name: MAILU_DB_PORT value: "5432" - - name: MAILU_DB_NAME - valueFrom: - secretKeyRef: - name: mailu-db-secret - key: database - - name: MAILU_DB_USER - valueFrom: - secretKeyRef: - name: mailu-db-secret - key: username - - name: MAILU_DB_PASSWORD - valueFrom: - secretKeyRef: - name: mailu-db-secret - key: password - - name: KEYCLOAK_CLIENT_ID - valueFrom: - secretKeyRef: - name: mailu-sync-credentials - key: client-id - - name: KEYCLOAK_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: mailu-sync-credentials - key: client-secret volumeMounts: - name: sync-script mountPath: /app/sync.py subPath: sync.py + - name: vault-scripts + mountPath: /vault/scripts + readOnly: true resources: requests: cpu: 50m @@ -71,3 +91,7 @@ spec: configMap: name: mailu-sync-script defaultMode: 0444 + - name: vault-scripts + configMap: + name: mailu-vault-env + defaultMode: 0555 diff --git a/services/mailu/mailu-sync-listener.yaml b/services/mailu/mailu-sync-listener.yaml index 2127313..cc98107 100644 --- a/services/mailu/mailu-sync-listener.yaml +++ b/services/mailu/mailu-sync-listener.yaml @@ -28,8 +28,31 @@ spec: metadata: labels: app: mailu-sync-listener + annotations: + vault.hashicorp.com/agent-inject: "true" + atlas.bstein.dev/mailu-sync-rev: "2" + vault.hashicorp.com/role: "mailu-mailserver" + vault.hashicorp.com/agent-inject-secret-mailu-db-secret__database: "kv/data/atlas/mailu/mailu-db-secret" + vault.hashicorp.com/agent-inject-template-mailu-db-secret__database: | + {{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.database }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mailu-db-secret__username: "kv/data/atlas/mailu/mailu-db-secret" + vault.hashicorp.com/agent-inject-template-mailu-db-secret__username: | + {{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.username }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mailu-db-secret__password: "kv/data/atlas/mailu/mailu-db-secret" + vault.hashicorp.com/agent-inject-template-mailu-db-secret__password: | + {{- with secret "kv/data/atlas/mailu/mailu-db-secret" -}}{{ .Data.data.password }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-id: "kv/data/atlas/mailu/mailu-sync-credentials" + vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-id: | + {{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-id" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mailu-sync-credentials__client-secret: "kv/data/atlas/mailu/mailu-sync-credentials" + vault.hashicorp.com/agent-inject-template-mailu-sync-credentials__client-secret: | + {{- with secret "kv/data/atlas/mailu/mailu-sync-credentials" -}}{{ index .Data.data "client-secret" }}{{- end -}} + vault.hashicorp.com/agent-inject-secret-mailu-initial-account-secret__password: "kv/data/atlas/mailu/mailu-initial-account-secret" + vault.hashicorp.com/agent-inject-template-mailu-initial-account-secret__password: | + {{- with secret "kv/data/atlas/mailu/mailu-initial-account-secret" -}}{{ .Data.data.password }}{{- end -}} spec: restartPolicy: Always + serviceAccountName: mailu-vault-sync containers: - name: listener image: python:3.11-alpine @@ -37,8 +60,10 @@ spec: command: ["/bin/sh", "-c"] args: - | + set -euo pipefail + . /vault/scripts/mailu_vault_env.sh pip install --no-cache-dir requests psycopg2-binary passlib >/tmp/pip.log \ - && python /app/listener.py + && python /app/listener.py env: - name: KEYCLOAK_BASE_URL value: http://keycloak.sso.svc.cluster.local @@ -48,35 +73,12 @@ spec: value: bstein.dev - name: MAILU_DEFAULT_QUOTA value: "20000000000" + - name: MAILU_SYSTEM_USERS + value: no-reply-portal@bstein.dev,no-reply-vaultwarden@bstein.dev - name: MAILU_DB_HOST value: postgres-service.postgres.svc.cluster.local - name: MAILU_DB_PORT value: "5432" - - name: MAILU_DB_NAME - valueFrom: - secretKeyRef: - name: mailu-db-secret - key: database - - name: MAILU_DB_USER - valueFrom: - secretKeyRef: - name: mailu-db-secret - key: username - - name: MAILU_DB_PASSWORD - valueFrom: - secretKeyRef: - name: mailu-db-secret - key: password - - name: KEYCLOAK_CLIENT_ID - valueFrom: - secretKeyRef: - name: mailu-sync-credentials - key: client-id - - name: KEYCLOAK_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: mailu-sync-credentials - key: client-secret volumeMounts: - name: sync-script mountPath: /app/sync.py @@ -84,6 +86,9 @@ spec: - name: listener-script mountPath: /app/listener.py subPath: listener.py + - name: vault-scripts + mountPath: /vault/scripts + readOnly: true resources: requests: cpu: 50m @@ -100,3 +105,7 @@ spec: configMap: name: mailu-sync-listener defaultMode: 0444 + - name: vault-scripts + configMap: + name: mailu-vault-env + defaultMode: 0555 diff --git a/services/mailu/scripts/mailu_sync.py b/services/mailu/scripts/mailu_sync.py index 74b170a..001917a 100644 --- a/services/mailu/scripts/mailu_sync.py +++ b/services/mailu/scripts/mailu_sync.py @@ -25,6 +25,14 @@ KC_CLIENT_SECRET = os.environ["KEYCLOAK_CLIENT_SECRET"] MAILU_DOMAIN = os.environ["MAILU_DOMAIN"] MAILU_DEFAULT_QUOTA = int(os.environ.get("MAILU_DEFAULT_QUOTA", "20000000000")) +MAILU_ENABLED_ATTR = os.environ.get("MAILU_ENABLED_ATTR", "mailu_enabled") +MAILU_EMAIL_ATTR = "mailu_email" +MAILU_SYSTEM_USERS = [ + item.strip() + for item in os.environ.get("MAILU_SYSTEM_USERS", "").split(",") + if item.strip() +] +MAILU_SYSTEM_PASSWORD = os.environ.get("MAILU_SYSTEM_PASSWORD", "").strip() DB_CONFIG = { "host": os.environ["MAILU_DB_HOST"], @@ -42,6 +50,28 @@ def log(msg): sys.stdout.flush() +def retry_request(label, func, attempts=10): + for attempt in range(1, attempts + 1): + try: + return func() + except requests.RequestException as exc: + if attempt == attempts: + raise + log(f"{label} failed (attempt {attempt}/{attempts}): {exc}") + time.sleep(attempt * 2) + + +def retry_db_connect(attempts=10): + for attempt in range(1, attempts + 1): + try: + return psycopg2.connect(**DB_CONFIG) + except psycopg2.Error as exc: + if attempt == attempts: + raise + log(f"Database connection failed (attempt {attempt}/{attempts}): {exc}") + time.sleep(attempt * 2) + + def get_kc_token(): resp = SESSION.post( f"{KC_BASE}/realms/{KC_REALM}/protocol/openid-connect/token", @@ -64,7 +94,12 @@ def kc_get_users(token): while True: resp = SESSION.get( f"{KC_BASE}/admin/realms/{KC_REALM}/users", - params={"first": first, "max": max_results, "enabled": "true"}, + params={ + "first": first, + "max": max_results, + "enabled": "true", + "briefRepresentation": "false", + }, headers=headers, timeout=20, ) @@ -82,17 +117,20 @@ def kc_update_attributes(token, user, attributes): "Authorization": f"Bearer {token}", "Content-Type": "application/json", } - payload = { - "firstName": user.get("firstName"), - "lastName": user.get("lastName"), - "email": user.get("email"), - "enabled": user.get("enabled", True), - "username": user["username"], - "emailVerified": user.get("emailVerified", False), - "attributes": attributes, - } user_url = f"{KC_BASE}/admin/realms/{KC_REALM}/users/{user['id']}" - resp = SESSION.put(user_url, headers=headers, json=payload, timeout=20) + current = SESSION.get( + user_url, + headers={"Authorization": f"Bearer {token}"}, + params={"briefRepresentation": "false"}, + timeout=15, + ) + current.raise_for_status() + current_payload = current.json() + current_attrs = current_payload.get("attributes") if isinstance(current_payload, dict) else None + if not isinstance(current_attrs, dict): + current_attrs = {} + current_attrs.update(attributes) + resp = SESSION.put(user_url, headers=headers, json={"attributes": current_attrs}, timeout=20) resp.raise_for_status() verify = SESSION.get( user_url, @@ -119,8 +157,15 @@ def get_attribute_value(attributes, key): return None +def mailu_enabled(attributes) -> bool: + raw = get_attribute_value(attributes, MAILU_ENABLED_ATTR) + if raw is None: + return bool(get_attribute_value(attributes, MAILU_EMAIL_ATTR)) + return str(raw).strip().lower() in {"1", "true", "yes", "y", "on"} + + def resolve_mailu_email(user, attributes): - explicit = get_attribute_value(attributes, "mailu_email") + explicit = get_attribute_value(attributes, MAILU_EMAIL_ATTR) if explicit: return explicit @@ -174,25 +219,48 @@ def ensure_mailu_user(cursor, email, password, display_name): ) +def ensure_system_mailboxes(cursor): + if not MAILU_SYSTEM_USERS: + return + if not MAILU_SYSTEM_PASSWORD: + log("MAILU_SYSTEM_USERS set but MAILU_SYSTEM_PASSWORD is missing; skipping system mailboxes") + return + + for email in MAILU_SYSTEM_USERS: + localpart = email.split("@", 1)[0] if "@" in email else email + try: + ensure_mailu_user(cursor, email, MAILU_SYSTEM_PASSWORD, localpart) + log(f"Ensured system mailbox for {email}") + except Exception as exc: + log(f"Failed to ensure system mailbox {email}: {exc}") + + def main(): - token = get_kc_token() - users = kc_get_users(token) - if not users: + token = retry_request("Keycloak token", get_kc_token) + users = retry_request("Keycloak user list", lambda: kc_get_users(token)) + if not users and not MAILU_SYSTEM_USERS: log("No users found; exiting.") return - conn = psycopg2.connect(**DB_CONFIG) + conn = retry_db_connect() conn.autocommit = True cursor = conn.cursor(cursor_factory=RealDictCursor) for user in users: attrs = user.get("attributes", {}) or {} + if user.get("enabled") is False: + continue + needs_update = False + if get_attribute_value(attrs, MAILU_ENABLED_ATTR) is None and get_attribute_value(attrs, MAILU_EMAIL_ATTR): + attrs[MAILU_ENABLED_ATTR] = ["true"] + needs_update = True + if not mailu_enabled(attrs): + continue app_pw = get_attribute_value(attrs, "mailu_app_password") mailu_email = resolve_mailu_email(user, attrs) - needs_update = False - if not get_attribute_value(attrs, "mailu_email"): - attrs["mailu_email"] = [mailu_email] + if not get_attribute_value(attrs, MAILU_EMAIL_ATTR): + attrs[MAILU_EMAIL_ATTR] = [mailu_email] needs_update = True if not app_pw: @@ -211,6 +279,8 @@ def main(): ensure_mailu_user(cursor, mailu_email, app_pw, display_name) log(f"Synced mailbox for {mailu_email}") + ensure_system_mailboxes(cursor) + cursor.close() conn.close() diff --git a/services/mailu/scripts/mailu_sync_listener.py b/services/mailu/scripts/mailu_sync_listener.py index 27070c0..6ac0da7 100644 --- a/services/mailu/scripts/mailu_sync_listener.py +++ b/services/mailu/scripts/mailu_sync_listener.py @@ -1,5 +1,6 @@ import http.server import json +import os import subprocess import threading @@ -7,15 +8,17 @@ from time import time # Simple debounce to avoid hammering on bursts MIN_INTERVAL_SECONDS = 10 +WAIT_TIMEOUT_SECONDS = float(os.environ.get("MAILU_SYNC_WAIT_TIMEOUT_SEC", "20")) last_run = 0.0 lock = threading.Lock() sync_done = threading.Event() sync_done.set() sync_running = False +last_rc = None def _run_sync_blocking() -> int: - global last_run, sync_running + global last_run, sync_running, last_rc with lock: if sync_running: return 0 @@ -27,6 +30,7 @@ def _run_sync_blocking() -> int: proc = subprocess.run(["python", "/app/sync.py"], check=False) rc = int(proc.returncode) print(f"mailu-sync-listener: sync completed rc={rc}", flush=True) + last_rc = rc return rc finally: with lock: @@ -66,16 +70,20 @@ class Handler(http.server.BaseHTTPRequestHandler): if wait: with lock: already_running = sync_running - if already_running: - sync_done.wait(timeout=120) - with lock: - still_running = sync_running - self.send_response(200 if not still_running else 503) - self.end_headers() - return - rc = _run_sync_blocking() - self.send_response(200 if rc == 0 else 500) + if not already_running: + _trigger_sync_async() + + sync_done.wait(timeout=WAIT_TIMEOUT_SECONDS) + with lock: + still_running = sync_running + rc = last_rc + + if still_running: + # Avoid blocking callers while a sync is in flight. + self.send_response(200) + else: + self.send_response(200 if rc == 0 else 500) self.end_headers() return diff --git a/services/mailu/scripts/mailu_vault_env.sh b/services/mailu/scripts/mailu_vault_env.sh new file mode 100644 index 0000000..fb8055b --- /dev/null +++ b/services/mailu/scripts/mailu_vault_env.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env sh +set -eu + +vault_dir="/vault/secrets" + +read_secret() { + tr -d '\r\n' < "${vault_dir}/$1" +} + +export MAILU_DB_NAME="$(read_secret mailu-db-secret__database)" +export MAILU_DB_USER="$(read_secret mailu-db-secret__username)" +export MAILU_DB_PASSWORD="$(read_secret mailu-db-secret__password)" +export KEYCLOAK_CLIENT_ID="$(read_secret mailu-sync-credentials__client-id)" +export KEYCLOAK_CLIENT_SECRET="$(read_secret mailu-sync-credentials__client-secret)" +export MAILU_SYSTEM_PASSWORD="$(read_secret mailu-initial-account-secret__password)" diff --git a/services/mailu/scripts/vault-entrypoint.sh b/services/mailu/scripts/vault-entrypoint.sh new file mode 100644 index 0000000..fa3b791 --- /dev/null +++ b/services/mailu/scripts/vault-entrypoint.sh @@ -0,0 +1,34 @@ +#!/bin/sh +set -eu + +if [ -n "${VAULT_ENV_FILE:-}" ]; then + if [ -f "${VAULT_ENV_FILE}" ]; then + # shellcheck disable=SC1090 + . "${VAULT_ENV_FILE}" + else + echo "Vault env file not found: ${VAULT_ENV_FILE}" >&2 + exit 1 + fi +fi + +if [ -n "${VAULT_COPY_FILES:-}" ]; then + old_ifs="$IFS" + IFS=',' + for pair in ${VAULT_COPY_FILES}; do + src="${pair%%:*}" + dest="${pair#*:}" + if [ -z "${src}" ] || [ -z "${dest}" ]; then + echo "Vault copy entry malformed: ${pair}" >&2 + exit 1 + fi + if [ ! -f "${src}" ]; then + echo "Vault file not found: ${src}" >&2 + exit 1 + fi + mkdir -p "$(dirname "${dest}")" + cp "${src}" "${dest}" + done + IFS="$old_ifs" +fi + +exec "$@" diff --git a/services/mailu/secretproviderclass.yaml b/services/mailu/secretproviderclass.yaml new file mode 100644 index 0000000..f58c69b --- /dev/null +++ b/services/mailu/secretproviderclass.yaml @@ -0,0 +1,21 @@ +# services/mailu/secretproviderclass.yaml +apiVersion: secrets-store.csi.x-k8s.io/v1 +kind: SecretProviderClass +metadata: + name: mailu-vault + namespace: mailu-mailserver +spec: + provider: vault + parameters: + vaultAddress: "http://vault.vault.svc.cluster.local:8200" + roleName: "mailu-mailserver" + objects: | + - objectName: "harbor-pull__dockerconfigjson" + secretPath: "kv/data/atlas/harbor-pull/mailu-mailserver" + secretKey: "dockerconfigjson" + secretObjects: + - secretName: harbor-regcred + type: kubernetes.io/dockerconfigjson + data: + - objectName: harbor-pull__dockerconfigjson + key: .dockerconfigjson diff --git a/services/mailu/serviceaccount.yaml b/services/mailu/serviceaccount.yaml new file mode 100644 index 0000000..d95410b --- /dev/null +++ b/services/mailu/serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/mailu/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: mailu-vault-sync + namespace: mailu-mailserver diff --git a/services/mailu/vault-sync-deployment.yaml b/services/mailu/vault-sync-deployment.yaml new file mode 100644 index 0000000..966f22b --- /dev/null +++ b/services/mailu/vault-sync-deployment.yaml @@ -0,0 +1,34 @@ +# services/mailu/vault-sync-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: mailu-vault-sync + namespace: mailu-mailserver +spec: + replicas: 1 + selector: + matchLabels: + app: mailu-vault-sync + template: + metadata: + labels: + app: mailu-vault-sync + spec: + serviceAccountName: mailu-vault-sync + containers: + - name: sync + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - "sleep infinity" + volumeMounts: + - name: vault-secrets + mountPath: /vault/secrets + readOnly: true + volumes: + - name: vault-secrets + csi: + driver: secrets-store.csi.k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: mailu-vault diff --git a/services/mailu/vip-controller.yaml b/services/mailu/vip-controller.yaml index a6d8c1f..faa49ec 100644 --- a/services/mailu/vip-controller.yaml +++ b/services/mailu/vip-controller.yaml @@ -5,6 +5,8 @@ kind: ServiceAccount metadata: name: vip-controller namespace: mailu-mailserver +imagePullSecrets: + - name: harbor-regcred --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role @@ -50,7 +52,7 @@ spec: mailu.bstein.dev/vip: "true" containers: - name: vip-controller - image: lachlanevenson/k8s-kubectl:latest + image: registry.bstein.dev/bstein/kubectl:1.35.0 imagePullPolicy: IfNotPresent command: - /bin/sh diff --git a/services/maintenance/disable-k3s-traefik-daemonset.yaml b/services/maintenance/disable-k3s-traefik-daemonset.yaml new file mode 100644 index 0000000..71f0ece --- /dev/null +++ b/services/maintenance/disable-k3s-traefik-daemonset.yaml @@ -0,0 +1,49 @@ +# services/maintenance/disable-k3s-traefik-daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: disable-k3s-traefik + namespace: maintenance +spec: + selector: + matchLabels: + app: disable-k3s-traefik + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: disable-k3s-traefik + spec: + serviceAccountName: disable-k3s-traefik + nodeSelector: + node-role.kubernetes.io/control-plane: "true" + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + containers: + - name: disable-k3s-traefik + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/usr/bin/env", "bash"] + args: ["/scripts/disable_k3s_traefik.sh"] + securityContext: + privileged: true + runAsUser: 0 + volumeMounts: + - name: host-root + mountPath: /host + - name: script + mountPath: /scripts + readOnly: true + volumes: + - name: host-root + hostPath: + path: / + - name: script + configMap: + name: disable-k3s-traefik-script + defaultMode: 0555 diff --git a/services/maintenance/disable-k3s-traefik-serviceaccount.yaml b/services/maintenance/disable-k3s-traefik-serviceaccount.yaml new file mode 100644 index 0000000..37bf6dc --- /dev/null +++ b/services/maintenance/disable-k3s-traefik-serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/maintenance/disable-k3s-traefik-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: disable-k3s-traefik + namespace: maintenance diff --git a/services/maintenance/image-sweeper-cronjob.yaml b/services/maintenance/image-sweeper-cronjob.yaml index 08127bc..c94fcca 100644 --- a/services/maintenance/image-sweeper-cronjob.yaml +++ b/services/maintenance/image-sweeper-cronjob.yaml @@ -17,6 +17,8 @@ spec: restartPolicy: OnFailure nodeSelector: kubernetes.io/os: linux + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" tolerations: - key: node-role.kubernetes.io/control-plane operator: Exists diff --git a/services/maintenance/k3s-agent-restart-daemonset.yaml b/services/maintenance/k3s-agent-restart-daemonset.yaml new file mode 100644 index 0000000..e91e348 --- /dev/null +++ b/services/maintenance/k3s-agent-restart-daemonset.yaml @@ -0,0 +1,49 @@ +# services/maintenance/k3s-agent-restart-daemonset.yaml +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: k3s-agent-restart + namespace: maintenance +spec: + selector: + matchLabels: + app: k3s-agent-restart + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: k3s-agent-restart + spec: + serviceAccountName: node-nofile + hostPID: true + hostNetwork: true + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + containers: + - name: restart + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: + - /bin/sh + - -c + args: + - | + set -euo pipefail + if nsenter -t 1 -m -u -i -n -p -- /usr/bin/systemctl restart k3s-agent; then + echo "k3s-agent restarted" + else + nsenter -t 1 -m -u -i -n -p -- /bin/systemctl restart k3s-agent + echo "k3s-agent restarted via /bin/systemctl" + fi + sleep infinity + securityContext: + privileged: true + runAsUser: 0 diff --git a/services/maintenance/k3s-traefik-cleanup-job.yaml b/services/maintenance/k3s-traefik-cleanup-job.yaml new file mode 100644 index 0000000..d5d12a6 --- /dev/null +++ b/services/maintenance/k3s-traefik-cleanup-job.yaml @@ -0,0 +1,40 @@ +# services/maintenance/k3s-traefik-cleanup-job.yaml +apiVersion: batch/v1 +kind: Job +metadata: + name: k3s-traefik-cleanup-2 + namespace: maintenance +spec: + backoffLimit: 1 + template: + spec: + serviceAccountName: k3s-traefik-cleanup + restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + containers: + - name: cleanup + image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 + command: ["/usr/bin/env", "bash"] + args: ["/scripts/k3s_traefik_cleanup.sh"] + volumeMounts: + - name: script + mountPath: /scripts + readOnly: true + volumes: + - name: script + configMap: + name: k3s-traefik-cleanup-script + defaultMode: 0555 diff --git a/services/maintenance/k3s-traefik-cleanup-rbac.yaml b/services/maintenance/k3s-traefik-cleanup-rbac.yaml new file mode 100644 index 0000000..45710c5 --- /dev/null +++ b/services/maintenance/k3s-traefik-cleanup-rbac.yaml @@ -0,0 +1,41 @@ +# services/maintenance/k3s-traefik-cleanup-rbac.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: k3s-traefik-cleanup + namespace: maintenance + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: k3s-traefik-cleanup +rules: + - apiGroups: ["helm.cattle.io"] + resources: ["helmcharts", "helmchartconfigs"] + verbs: ["get", "list", "watch", "delete"] + - apiGroups: [""] + resources: ["services", "serviceaccounts"] + verbs: ["get", "list", "watch", "delete"] + - apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get", "list", "watch", "delete"] + - apiGroups: ["rbac.authorization.k8s.io"] + resources: ["clusterroles", "clusterrolebindings"] + verbs: ["get", "list", "watch", "delete"] + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: k3s-traefik-cleanup +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: k3s-traefik-cleanup +subjects: + - kind: ServiceAccount + name: k3s-traefik-cleanup + namespace: maintenance diff --git a/services/maintenance/kustomization.yaml b/services/maintenance/kustomization.yaml index ce34afb..e53ed3c 100644 --- a/services/maintenance/kustomization.yaml +++ b/services/maintenance/kustomization.yaml @@ -3,15 +3,32 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - namespace.yaml + - disable-k3s-traefik-serviceaccount.yaml + - k3s-traefik-cleanup-rbac.yaml - node-nofile-serviceaccount.yaml - pod-cleaner-rbac.yaml + - disable-k3s-traefik-daemonset.yaml + - k3s-traefik-cleanup-job.yaml - node-nofile-daemonset.yaml + - k3s-agent-restart-daemonset.yaml - pod-cleaner-cronjob.yaml - node-image-sweeper-serviceaccount.yaml - node-image-sweeper-daemonset.yaml - image-sweeper-cronjob.yaml configMapGenerator: + - name: disable-k3s-traefik-script + namespace: maintenance + files: + - disable_k3s_traefik.sh=scripts/disable_k3s_traefik.sh + options: + disableNameSuffixHash: true + - name: k3s-traefik-cleanup-script + namespace: maintenance + files: + - k3s_traefik_cleanup.sh=scripts/k3s_traefik_cleanup.sh + options: + disableNameSuffixHash: true - name: node-nofile-script namespace: maintenance files: diff --git a/services/maintenance/pod-cleaner-cronjob.yaml b/services/maintenance/pod-cleaner-cronjob.yaml index ffca7dd..e083c85 100644 --- a/services/maintenance/pod-cleaner-cronjob.yaml +++ b/services/maintenance/pod-cleaner-cronjob.yaml @@ -16,6 +16,9 @@ spec: spec: serviceAccountName: pod-cleaner restartPolicy: Never + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" containers: - name: cleaner image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 diff --git a/services/maintenance/scripts/disable_k3s_traefik.sh b/services/maintenance/scripts/disable_k3s_traefik.sh new file mode 100644 index 0000000..7b8cebd --- /dev/null +++ b/services/maintenance/scripts/disable_k3s_traefik.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +set -euo pipefail + +host_root="/host" +env_file="${host_root}/etc/systemd/system/k3s.service.env" +manifest_dir="${host_root}/var/lib/rancher/k3s/server/manifests" + +changed=0 + +ensure_disable_flag() { + mkdir -p "$(dirname "${env_file}")" + if [ ! -f "${env_file}" ]; then + printf 'K3S_DISABLE=traefik\n' > "${env_file}" + changed=1 + return + fi + + if grep -q '^K3S_DISABLE=' "${env_file}"; then + current="$(grep '^K3S_DISABLE=' "${env_file}" | tail -n1 | cut -d= -f2-)" + current="$(printf '%s' "${current}" | sed 's/^\"//;s/\"$//' | tr -d ' ')" + if ! printf '%s' "${current}" | grep -qw "traefik"; then + if [ -z "${current}" ]; then + updated="traefik" + else + updated="${current},traefik" + fi + sed -i "s/^K3S_DISABLE=.*/K3S_DISABLE=${updated}/" "${env_file}" + changed=1 + fi + else + printf '\nK3S_DISABLE=traefik\n' >> "${env_file}" + changed=1 + fi +} + +remove_manifest() { + if [ -d "${manifest_dir}" ] && ls "${manifest_dir}"/traefik* >/dev/null 2>&1; then + rm -f "${manifest_dir}"/traefik*.yaml "${manifest_dir}"/traefik*.yml + changed=1 + fi +} + +restart_k3s() { + node_name="$(cat "${host_root}/etc/hostname" 2>/dev/null || hostname)" + delay=0 + case "${node_name}" in + *0b) delay=60 ;; + *0c) delay=120 ;; + esac + if [ "${delay}" -gt 0 ]; then + sleep "${delay}" + fi + chroot "${host_root}" /bin/systemctl daemon-reload || true + chroot "${host_root}" /bin/systemctl restart k3s +} + +ensure_disable_flag +remove_manifest + +if [ "${changed}" -eq 1 ]; then + restart_k3s +fi + +sleep infinity diff --git a/services/maintenance/scripts/k3s_traefik_cleanup.sh b/services/maintenance/scripts/k3s_traefik_cleanup.sh new file mode 100755 index 0000000..81ba337 --- /dev/null +++ b/services/maintenance/scripts/k3s_traefik_cleanup.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -euo pipefail + +kubectl -n kube-system delete helmchart traefik traefik-crd --ignore-not-found --wait=false +kubectl -n kube-system delete deployment traefik --ignore-not-found --wait=false +kubectl -n kube-system delete service traefik --ignore-not-found --wait=false +kubectl -n kube-system delete serviceaccount traefik helm-traefik helm-traefik-crd --ignore-not-found --wait=false + +kubectl delete clusterrole traefik-ingress-controller traefik-kube-system --ignore-not-found --wait=false +kubectl delete clusterrolebinding helm-kube-system-traefik helm-kube-system-traefik-crd traefik-ingress-controller traefik-kube-system --ignore-not-found --wait=false diff --git a/services/monitoring/dashboards/atlas-gpu.json b/services/monitoring/dashboards/atlas-gpu.json index fb1b216..af8a1c5 100644 --- a/services/monitoring/dashboards/atlas-gpu.json +++ b/services/monitoring/dashboards/atlas-gpu.json @@ -57,7 +57,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -67,11 +67,11 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], - "description": "Values are normalized within the selected scope; use panel links to switch scope." + "description": "Shares are normalized within the selected filter. Switching scope changes the denominator." }, { "id": 2, @@ -207,16 +207,16 @@ "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, { @@ -226,7 +226,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": false } ], @@ -241,16 +241,16 @@ "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, { @@ -260,7 +260,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": false } ], @@ -275,16 +275,16 @@ "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, { @@ -294,7 +294,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": false } ], diff --git a/services/monitoring/dashboards/atlas-nodes.json b/services/monitoring/dashboards/atlas-nodes.json index 0bfd639..2d60042 100644 --- a/services/monitoring/dashboards/atlas-nodes.json +++ b/services/monitoring/dashboards/atlas-nodes.json @@ -142,7 +142,7 @@ }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"})", "refId": "A" } ], diff --git a/services/monitoring/dashboards/atlas-overview.json b/services/monitoring/dashboards/atlas-overview.json index a113d22..c5f30d1 100644 --- a/services/monitoring/dashboards/atlas-overview.json +++ b/services/monitoring/dashboards/atlas-overview.json @@ -76,7 +76,7 @@ }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"}) or on() vector(0)", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"}) or on() vector(0)", "refId": "A" } ], @@ -796,7 +796,7 @@ }, "gridPos": { "h": 2, - "w": 6, + "w": 5, "x": 0, "y": 8 }, @@ -863,8 +863,8 @@ }, "gridPos": { "h": 2, - "w": 6, - "x": 12, + "w": 5, + "x": 10, "y": 8 }, "targets": [ @@ -968,8 +968,8 @@ }, "gridPos": { "h": 2, - "w": 6, - "x": 6, + "w": 5, + "x": 5, "y": 8 }, "targets": [ @@ -1044,8 +1044,8 @@ }, "gridPos": { "h": 2, - "w": 6, - "x": 18, + "w": 5, + "x": 15, "y": 8 }, "targets": [ @@ -1447,7 +1447,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1457,11 +1457,11 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], - "description": "Values are normalized within the selected scope; use panel links to switch scope." + "description": "Shares are normalized within the selected filter. Switching scope changes the denominator." }, { "id": 12, @@ -1516,7 +1516,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1526,11 +1526,11 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], - "description": "Values are normalized within the selected scope; use panel links to switch scope." + "description": "Shares are normalized within the selected filter. Switching scope changes the denominator." }, { "id": 13, @@ -1585,7 +1585,7 @@ "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22", "targetBlank": false }, { @@ -1595,11 +1595,11 @@ }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22", "targetBlank": false } ], - "description": "Values are normalized within the selected scope; use panel links to switch scope." + "description": "Shares are normalized within the selected filter. Switching scope changes the denominator." }, { "id": 14, @@ -2174,16 +2174,16 @@ "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, { @@ -2193,7 +2193,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": false } ], @@ -2208,16 +2208,16 @@ "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, { @@ -2227,7 +2227,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": false } ], @@ -2242,16 +2242,16 @@ "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, { @@ -2261,7 +2261,7 @@ }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": false } ], diff --git a/services/monitoring/dashboards/atlas-pods.json b/services/monitoring/dashboards/atlas-pods.json index ff2dbdd..adab84b 100644 --- a/services/monitoring/dashboards/atlas-pods.json +++ b/services/monitoring/dashboards/atlas-pods.json @@ -200,7 +200,7 @@ }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"})", "refId": "A" } ], diff --git a/services/monitoring/dashboards/atlas-storage.json b/services/monitoring/dashboards/atlas-storage.json index d93a941..0eca11c 100644 --- a/services/monitoring/dashboards/atlas-storage.json +++ b/services/monitoring/dashboards/atlas-storage.json @@ -494,7 +494,7 @@ }, "targets": [ { - "expr": "time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace=\"maintenance\",cronjob=~\"image-sweeper|grafana-smtp-sync\"})", + "expr": "time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace=\"maintenance\",cronjob=\"image-sweeper\"})", "refId": "A" } ], diff --git a/services/monitoring/dashboards/atlas-testing.json b/services/monitoring/dashboards/atlas-testing.json new file mode 100644 index 0000000..25cf3f8 --- /dev/null +++ b/services/monitoring/dashboards/atlas-testing.json @@ -0,0 +1,339 @@ +{ + "uid": "atlas-testing", + "title": "Atlas Testing", + "folderUid": "atlas-internal", + "editable": true, + "panels": [ + { + "id": 1, + "type": "stat", + "title": "Glue Jobs Stale (>36h)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 0 + }, + "targets": [ + { + "expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)))", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 3 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 2, + "type": "table", + "title": "Glue Jobs Missing Success", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 0 + }, + "targets": [ + { + "expr": "((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 3, + "type": "table", + "title": "Glue Jobs Suspended", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 0 + }, + "targets": [ + { + "expr": "(kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 4, + "type": "table", + "title": "Glue Jobs Active Runs", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 0 + }, + "targets": [ + { + "expr": "(kube_cronjob_status_active and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 5, + "type": "table", + "title": "Glue Jobs Last Success (hours ago)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 4 + }, + "targets": [ + { + "expr": "((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}))) / 3600", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "h", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 6, + "type": "table", + "title": "Glue Jobs Last Schedule (hours ago)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 4 + }, + "targets": [ + { + "expr": "((time() - (kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}))) / 3600", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "h", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + } + ], + "time": { + "from": "now-7d", + "to": "now" + }, + "annotations": { + "list": [] + }, + "schemaVersion": 39, + "style": "dark", + "tags": [ + "atlas", + "testing" + ] +} diff --git a/services/monitoring/dcgm-exporter.yaml b/services/monitoring/dcgm-exporter.yaml index 7627420..8760c9f 100644 --- a/services/monitoring/dcgm-exporter.yaml +++ b/services/monitoring/dcgm-exporter.yaml @@ -22,6 +22,8 @@ spec: prometheus.io/port: "9400" spec: serviceAccountName: default + imagePullSecrets: + - name: harbor-regcred runtimeClassName: nvidia affinity: nodeAffinity: diff --git a/services/monitoring/grafana-alerting-config.yaml b/services/monitoring/grafana-alerting-config.yaml index c679bff..daa1e29 100644 --- a/services/monitoring/grafana-alerting-config.yaml +++ b/services/monitoring/grafana-alerting-config.yaml @@ -244,7 +244,7 @@ data: to: 0 datasourceUid: atlas-vm model: - expr: time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob=~"image-sweeper|grafana-smtp-sync"}) + expr: time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob="image-sweeper"}) intervalMs: 60000 maxDataPoints: 43200 legendFormat: '{{cronjob}}' diff --git a/services/monitoring/grafana-dashboard-gpu.yaml b/services/monitoring/grafana-dashboard-gpu.yaml index 49b5d39..d7950f2 100644 --- a/services/monitoring/grafana-dashboard-gpu.yaml +++ b/services/monitoring/grafana-dashboard-gpu.yaml @@ -66,7 +66,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -76,11 +76,11 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], - "description": "Values are normalized within the selected scope; use panel links to switch scope." + "description": "Shares are normalized within the selected filter. Switching scope changes the denominator." }, { "id": 2, @@ -216,16 +216,16 @@ data: "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, { @@ -235,7 +235,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": false } ], @@ -250,16 +250,16 @@ data: "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, { @@ -269,7 +269,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": false } ], @@ -284,16 +284,16 @@ data: "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, { @@ -303,7 +303,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": false } ], diff --git a/services/monitoring/grafana-dashboard-nodes.yaml b/services/monitoring/grafana-dashboard-nodes.yaml index 5e02c18..f0f1982 100644 --- a/services/monitoring/grafana-dashboard-nodes.yaml +++ b/services/monitoring/grafana-dashboard-nodes.yaml @@ -151,7 +151,7 @@ data: }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"})", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-overview.yaml b/services/monitoring/grafana-dashboard-overview.yaml index e627658..8ad7523 100644 --- a/services/monitoring/grafana-dashboard-overview.yaml +++ b/services/monitoring/grafana-dashboard-overview.yaml @@ -85,7 +85,7 @@ data: }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"}) or on() vector(0)", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"}) or on() vector(0)", "refId": "A" } ], @@ -805,7 +805,7 @@ data: }, "gridPos": { "h": 2, - "w": 6, + "w": 5, "x": 0, "y": 8 }, @@ -872,8 +872,8 @@ data: }, "gridPos": { "h": 2, - "w": 6, - "x": 12, + "w": 5, + "x": 10, "y": 8 }, "targets": [ @@ -977,8 +977,8 @@ data: }, "gridPos": { "h": 2, - "w": 6, - "x": 6, + "w": 5, + "x": 5, "y": 8 }, "targets": [ @@ -1053,8 +1053,8 @@ data: }, "gridPos": { "h": 2, - "w": 6, - "x": 18, + "w": 5, + "x": 15, "y": 8 }, "targets": [ @@ -1456,7 +1456,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1466,11 +1466,11 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], - "description": "Values are normalized within the selected scope; use panel links to switch scope." + "description": "Shares are normalized within the selected filter. Switching scope changes the denominator." }, { "id": 12, @@ -1525,7 +1525,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false }, { @@ -1535,11 +1535,11 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22&var-namespace_scope_ram=${namespace_scope_ram}", "targetBlank": false } ], - "description": "Values are normalized within the selected scope; use panel links to switch scope." + "description": "Shares are normalized within the selected filter. Switching scope changes the denominator." }, { "id": 13, @@ -1594,7 +1594,7 @@ data: "links": [ { "title": "Workload namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%21~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22", "targetBlank": false }, { @@ -1604,11 +1604,11 @@ data: }, { "title": "Infrastructure namespaces only", - "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-system%7Clonghorn-system%7Cmetallb-system%7Cmonitoring%7Clogging%7Ccert-manager%7Cflux-system%7Ctraefik%7Cmaintenance%7Cpostgres%29%24%22", + "url": "?var-namespace_scope_cpu=${namespace_scope_cpu}&var-namespace_scope_gpu=${namespace_scope_gpu}&var-namespace_scope_ram=namespace%3D~%22%5E%28kube-.%2A%7C.%2A-system%7Ctraefik%7Cmonitoring%7Clogging%7Ccert-manager%7Cmaintenance%7Cpostgres%29%24%22", "targetBlank": false } ], - "description": "Values are normalized within the selected scope; use panel links to switch scope." + "description": "Shares are normalized within the selected filter. Switching scope changes the denominator." }, { "id": 14, @@ -2183,16 +2183,16 @@ data: "name": "namespace_scope_cpu", "label": "CPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, { @@ -2202,7 +2202,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": false } ], @@ -2217,16 +2217,16 @@ data: "name": "namespace_scope_gpu", "label": "GPU namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, { @@ -2236,7 +2236,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": false } ], @@ -2251,16 +2251,16 @@ data: "name": "namespace_scope_ram", "label": "RAM namespace filter", "type": "custom", - "query": "workload namespaces only : namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "query": "workload namespaces only : namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\",all namespaces : namespace=~\".*\",infrastructure namespaces only : namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "current": { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, "options": [ { "text": "workload namespaces only", - "value": "namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": true }, { @@ -2270,7 +2270,7 @@ data: }, { "text": "infrastructure namespaces only", - "value": "namespace=~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"", + "value": "namespace=~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"", "selected": false } ], diff --git a/services/monitoring/grafana-dashboard-pods.yaml b/services/monitoring/grafana-dashboard-pods.yaml index 5ea8343..f537d4c 100644 --- a/services/monitoring/grafana-dashboard-pods.yaml +++ b/services/monitoring/grafana-dashboard-pods.yaml @@ -209,7 +209,7 @@ data: }, "targets": [ { - "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-system|longhorn-system|metallb-system|monitoring|logging|cert-manager|flux-system|traefik|maintenance|postgres)$\"})", + "expr": "sum(kube_pod_info{node=~\"titan-0a|titan-0b|titan-0c\",namespace!~\"^(kube-.*|.*-system|traefik|monitoring|logging|cert-manager|maintenance|postgres)$\"})", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-storage.yaml b/services/monitoring/grafana-dashboard-storage.yaml index 5ce4186..d25e922 100644 --- a/services/monitoring/grafana-dashboard-storage.yaml +++ b/services/monitoring/grafana-dashboard-storage.yaml @@ -503,7 +503,7 @@ data: }, "targets": [ { - "expr": "time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace=\"maintenance\",cronjob=~\"image-sweeper|grafana-smtp-sync\"})", + "expr": "time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace=\"maintenance\",cronjob=\"image-sweeper\"})", "refId": "A" } ], diff --git a/services/monitoring/grafana-dashboard-testing.yaml b/services/monitoring/grafana-dashboard-testing.yaml new file mode 100644 index 0000000..80a7043 --- /dev/null +++ b/services/monitoring/grafana-dashboard-testing.yaml @@ -0,0 +1,348 @@ +# services/monitoring/grafana-dashboard-testing.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-dashboard-testing + labels: + grafana_dashboard: "1" +data: + atlas-testing.json: | + { + "uid": "atlas-testing", + "title": "Atlas Testing", + "folderUid": "atlas-internal", + "editable": true, + "panels": [ + { + "id": 1, + "type": "stat", + "title": "Glue Jobs Stale (>36h)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 0 + }, + "targets": [ + { + "expr": "(sum((((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})) > bool 129600) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)) + count(((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)))", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 3 + } + ] + }, + "unit": "none", + "custom": { + "displayMode": "auto" + } + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + } + }, + { + "id": 2, + "type": "table", + "title": "Glue Jobs Missing Success", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 0 + }, + "targets": [ + { + "expr": "((kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time) unless on(namespace,cronjob) (kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1)", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 3, + "type": "table", + "title": "Glue Jobs Suspended", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 0 + }, + "targets": [ + { + "expr": "(kube_cronjob_spec_suspend and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}) == 1", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 4, + "type": "table", + "title": "Glue Jobs Active Runs", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 0 + }, + "targets": [ + { + "expr": "(kube_cronjob_status_active and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"})", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "none", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 5, + "type": "table", + "title": "Glue Jobs Last Success (hours ago)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 4 + }, + "targets": [ + { + "expr": "((time() - (kube_cronjob_status_last_successful_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}))) / 3600", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "h", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + }, + { + "id": 6, + "type": "table", + "title": "Glue Jobs Last Schedule (hours ago)", + "datasource": { + "type": "prometheus", + "uid": "atlas-vm" + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 4 + }, + "targets": [ + { + "expr": "((time() - (kube_cronjob_status_last_schedule_time and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue=\"true\"}))) / 3600", + "refId": "A", + "instant": true + } + ], + "fieldConfig": { + "defaults": { + "unit": "h", + "custom": { + "filterable": true + } + }, + "overrides": [] + }, + "options": { + "showHeader": true, + "columnFilters": false + }, + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "sortBy", + "options": { + "fields": [ + "Value" + ], + "order": "desc" + } + } + ] + } + ], + "time": { + "from": "now-7d", + "to": "now" + }, + "annotations": { + "list": [] + }, + "schemaVersion": 39, + "style": "dark", + "tags": [ + "atlas", + "testing" + ] + } diff --git a/services/monitoring/grafana-org-bootstrap.yaml b/services/monitoring/grafana-org-bootstrap.yaml index 0872f4a..f1d4075 100644 --- a/services/monitoring/grafana-org-bootstrap.yaml +++ b/services/monitoring/grafana-org-bootstrap.yaml @@ -2,13 +2,39 @@ apiVersion: batch/v1 kind: Job metadata: - name: grafana-org-bootstrap-1 + name: grafana-org-bootstrap-3 namespace: monitoring spec: backoffLimit: 2 template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "monitoring" + vault.hashicorp.com/agent-inject-secret-grafana-env: "kv/data/atlas/monitoring/grafana-admin" + vault.hashicorp.com/agent-inject-template-grafana-env: | + {{- with secret "kv/data/atlas/monitoring/grafana-admin" -}} + export GRAFANA_USER="{{ index .Data.data "admin-user" }}" + export GRAFANA_PASSWORD="{{ index .Data.data "admin-password" }}" + {{- end -}} spec: restartPolicy: OnFailure + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/worker + operator: Exists + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["arm64"] + serviceAccountName: monitoring-vault-sync containers: - name: bootstrap image: python:3.11-alpine @@ -17,20 +43,11 @@ spec: value: http://grafana - name: OVERVIEW_ORG_NAME value: Overview - - name: GRAFANA_USER - valueFrom: - secretKeyRef: - name: grafana-admin - key: admin-user - - name: GRAFANA_PASSWORD - valueFrom: - secretKeyRef: - name: grafana-admin - key: admin-password command: ["/bin/sh", "-c"] args: - | set -euo pipefail + . /vault/secrets/grafana-env python - <<'PY' import base64 import json diff --git a/services/monitoring/grafana-smtp-sync-cronjob.yaml b/services/monitoring/grafana-smtp-sync-cronjob.yaml deleted file mode 100644 index 3b92d4c..0000000 --- a/services/monitoring/grafana-smtp-sync-cronjob.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# services/monitoring/grafana-smtp-sync-cronjob.yaml -apiVersion: batch/v1 -kind: CronJob -metadata: - name: grafana-smtp-sync - namespace: monitoring -spec: - schedule: "15 3 * * *" - concurrencyPolicy: Forbid - jobTemplate: - spec: - template: - spec: - serviceAccountName: grafana-smtp-sync - restartPolicy: OnFailure - containers: - - name: sync - image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131 - command: ["/bin/sh", "-c"] - args: - - | - set -euo pipefail - if ! command -v jq >/dev/null 2>&1; then - apt-get update >/dev/null && apt-get install -y jq >/dev/null - fi - exec /scripts/sync.sh - env: - - name: SOURCE_NS - value: mailu-mailserver - - name: SOURCE_SECRET - value: mailu-postmark-relay - - name: TARGET_NS - value: monitoring - - name: TARGET_SECRET - value: grafana-smtp - volumeMounts: - - name: script - mountPath: /scripts - readOnly: true - volumes: - - name: script - configMap: - name: grafana-smtp-sync-script - defaultMode: 0555 diff --git a/services/monitoring/grafana-smtp-sync-rbac.yaml b/services/monitoring/grafana-smtp-sync-rbac.yaml deleted file mode 100644 index 532d622..0000000 --- a/services/monitoring/grafana-smtp-sync-rbac.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# services/monitoring/grafana-smtp-sync-rbac.yaml -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: grafana-smtp-sync -rules: - - apiGroups: [""] - resources: ["secrets"] - verbs: ["get"] - resourceNames: - - mailu-postmark-relay ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: grafana-smtp-sync -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: grafana-smtp-sync -subjects: - - kind: ServiceAccount - name: grafana-smtp-sync - namespace: monitoring - ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: grafana-smtp-sync - namespace: monitoring -rules: - - apiGroups: [""] - resources: ["secrets"] - verbs: ["get", "create", "update", "patch"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: grafana-smtp-sync - namespace: monitoring -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: grafana-smtp-sync -subjects: - - kind: ServiceAccount - name: grafana-smtp-sync - namespace: monitoring diff --git a/services/monitoring/grafana-smtp-sync-serviceaccount.yaml b/services/monitoring/grafana-smtp-sync-serviceaccount.yaml deleted file mode 100644 index 6ad0e18..0000000 --- a/services/monitoring/grafana-smtp-sync-serviceaccount.yaml +++ /dev/null @@ -1,6 +0,0 @@ -# services/monitoring/grafana-smtp-sync-serviceaccount.yaml -apiVersion: v1 -kind: ServiceAccount -metadata: - name: grafana-smtp-sync - namespace: monitoring diff --git a/services/monitoring/helmrelease.yaml b/services/monitoring/helmrelease.yaml index 704b91d..304de05 100644 --- a/services/monitoring/helmrelease.yaml +++ b/services/monitoring/helmrelease.yaml @@ -1,4 +1,4 @@ -# services/monitoring/kube-state-metrics-helmrelease.yaml +# services/monitoring/helmrelease.yaml apiVersion: helm.toolkit.fluxcd.io/v2 kind: HelmRelease metadata: @@ -15,7 +15,18 @@ spec: name: prometheus namespace: flux-system values: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: NotIn + values: + - titan-22 prometheusScrape: false + metricLabelsAllowlist: + - cronjobs=[atlas.bstein.dev/glue] --- @@ -71,7 +82,16 @@ spec: persistentVolume: enabled: true - size: 250Gi + size: 100Gi + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: NotIn + values: + - titan-22 # Enable built-in Kubernetes scraping scrape: @@ -245,15 +265,69 @@ spec: kind: HelmRepository name: grafana namespace: flux-system + install: + remediation: { retries: 3 } + timeout: 15m + upgrade: + remediation: + retries: 3 + remediateLastFailure: true + cleanupOnFail: true + timeout: 15m values: admin: existingSecret: grafana-admin userKey: admin-user passwordKey: admin-password + serviceAccount: + create: false + name: monitoring-vault-sync + automountServiceAccountToken: true + podAnnotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "monitoring" + vault.hashicorp.com/agent-inject-secret-grafana-env.sh: "kv/data/atlas/monitoring/grafana-admin" + vault.hashicorp.com/agent-inject-template-grafana-env.sh: | + {{ with secret "kv/data/atlas/monitoring/grafana-admin" }} + export GF_SECURITY_ADMIN_USER="{{ index .Data.data "admin-user" }}" + export GF_SECURITY_ADMIN_PASSWORD="{{ index .Data.data "admin-password" }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export GF_SMTP_USER="{{ index .Data.data "apikey" }}" + export GF_SMTP_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} persistence: enabled: true size: 20Gi storageClassName: astreae + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - rpi4 + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: hardware + operator: In + values: + - rpi5 + - weight: 70 + preference: + matchExpressions: + - key: hardware + operator: In + values: + - rpi4 deploymentStrategy: type: Recreate service: @@ -265,9 +339,9 @@ spec: GF_AUTH_ANONYMOUS_ORG_NAME: "Overview" GF_AUTH_ANONYMOUS_ORG_ROLE: "Viewer" GF_SMTP_ENABLED: "true" - GF_SMTP_HOST: "smtp.postmarkapp.com:587" - GF_SMTP_FROM: "alerts@bstein.dev" - GF_SMTP_FROM_NAME: "Atlas Alerts" + GF_SMTP_HOST: "mail.bstein.dev:587" + GF_SMTP_FROM: "no-reply-grafana@bstein.dev" + GF_SMTP_FROM_NAME: "Atlas Grafana" GRAFANA_ALERT_EMAILS: "alerts@bstein.dev" GF_SECURITY_ALLOW_EMBEDDING: "true" GF_AUTH_GENERIC_OAUTH_ENABLED: "true" @@ -291,20 +365,13 @@ spec: hide_version: true users: default_theme: dark - envValueFrom: - GF_SMTP_USER: - secretKeyRef: - name: grafana-smtp - key: username - GF_SMTP_PASSWORD: - secretKeyRef: - name: grafana-smtp - key: password ingress: enabled: true ingressClassName: traefik annotations: cert-manager.io/cluster-issuer: letsencrypt + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" hosts: - metrics.bstein.dev path: / @@ -402,6 +469,14 @@ spec: editable: true options: path: /var/lib/grafana/dashboards/mail + - name: testing + orgId: 1 + folder: Atlas Internal + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards/testing dashboardsConfigMaps: overview: grafana-dashboard-overview overview-public: grafana-dashboard-overview @@ -411,6 +486,7 @@ spec: gpu: grafana-dashboard-gpu network: grafana-dashboard-network mail: grafana-dashboard-mail + testing: grafana-dashboard-testing extraConfigmapMounts: - name: grafana-folders mountPath: /etc/grafana/provisioning/folders @@ -420,6 +496,48 @@ spec: mountPath: /etc/grafana/provisioning/alerting configMap: grafana-alerting readOnly: true + postRenderers: + - kustomize: + patches: + - target: + kind: Deployment + name: grafana + patch: |- + apiVersion: apps/v1 + kind: Deployment + metadata: + name: grafana + spec: + template: + spec: + serviceAccountName: monitoring-vault-sync + automountServiceAccountToken: true + containers: + - name: grafana + command: + - /entrypoint.sh + args: + - /run.sh + env: + - name: GF_SECURITY_ADMIN_USER + $patch: delete + - name: GF_SECURITY_ADMIN_PASSWORD + $patch: delete + - name: GF_SMTP_USER + $patch: delete + - name: GF_SMTP_PASSWORD + $patch: delete + - name: VAULT_ENV_FILE + value: /vault/secrets/grafana-env.sh + volumeMounts: + - name: monitoring-vault-entrypoint + mountPath: /entrypoint.sh + subPath: vault-entrypoint.sh + volumes: + - name: monitoring-vault-entrypoint + configMap: + name: monitoring-vault-entrypoint + defaultMode: 493 --- @@ -439,11 +557,22 @@ spec: name: prometheus namespace: flux-system values: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: NotIn + values: + - titan-22 ingress: enabled: true ingressClassName: traefik annotations: cert-manager.io/cluster-issuer: letsencrypt + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" hosts: - host: alerts.bstein.dev paths: diff --git a/services/monitoring/kustomization.yaml b/services/monitoring/kustomization.yaml index 0dafba7..7d0b01b 100644 --- a/services/monitoring/kustomization.yaml +++ b/services/monitoring/kustomization.yaml @@ -5,6 +5,8 @@ namespace: monitoring resources: - namespace.yaml - rbac.yaml + - secretproviderclass.yaml + - vault-serviceaccount.yaml - grafana-dashboard-overview.yaml - grafana-dashboard-pods.yaml - grafana-dashboard-nodes.yaml @@ -12,14 +14,13 @@ resources: - grafana-dashboard-network.yaml - grafana-dashboard-gpu.yaml - grafana-dashboard-mail.yaml + - grafana-dashboard-testing.yaml - dcgm-exporter.yaml - jetson-tegrastats-exporter.yaml - postmark-exporter-service.yaml - postmark-exporter-deployment.yaml + - vault-sync-deployment.yaml - grafana-alerting-config.yaml - - grafana-smtp-sync-serviceaccount.yaml - - grafana-smtp-sync-rbac.yaml - - grafana-smtp-sync-cronjob.yaml - grafana-folders.yaml - helmrelease.yaml - grafana-org-bootstrap.yaml @@ -31,15 +32,15 @@ configMapGenerator: - monitoring_postmark_exporter.py=scripts/postmark_exporter.py options: disableNameSuffixHash: true - - name: grafana-smtp-sync-script - namespace: monitoring - files: - - sync.sh=scripts/grafana_smtp_sync.sh - options: - disableNameSuffixHash: true - name: jetson-tegrastats-exporter-script namespace: monitoring files: - exporter.py=scripts/jetson_tegrastats_exporter.py options: disableNameSuffixHash: true + - name: monitoring-vault-entrypoint + namespace: monitoring + files: + - scripts/vault-entrypoint.sh + options: + disableNameSuffixHash: true diff --git a/services/monitoring/namespace.yaml b/services/monitoring/namespace.yaml index 3335b6a..37732a0 100644 --- a/services/monitoring/namespace.yaml +++ b/services/monitoring/namespace.yaml @@ -1,4 +1,5 @@ +# services/monitoring/namespace.yaml apiVersion: v1 kind: Namespace metadata: - name: monitoring \ No newline at end of file + name: monitoring diff --git a/services/monitoring/postmark-exporter-deployment.yaml b/services/monitoring/postmark-exporter-deployment.yaml index 646c455..6406224 100644 --- a/services/monitoring/postmark-exporter-deployment.yaml +++ b/services/monitoring/postmark-exporter-deployment.yaml @@ -16,8 +16,29 @@ spec: prometheus.io/scrape: "true" prometheus.io/port: "8000" prometheus.io/path: "/metrics" + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "monitoring" + vault.hashicorp.com/agent-inject-secret-postmark-env: "kv/data/atlas/monitoring/postmark-exporter" + vault.hashicorp.com/agent-inject-template-postmark-env: | + {{- with secret "kv/data/atlas/monitoring/postmark-exporter" -}} + export POSTMARK_SERVER_TOKEN="{{ index .Data.data "apikey" }}" + export POSTMARK_SERVER_TOKEN_FALLBACK="{{ index .Data.data "apikey" }}" + {{- if index .Data.data "sending-limit" }} + export POSTMARK_SENDING_LIMIT="{{ index .Data.data "sending-limit" }}" + {{- end }} + {{- end -}} bstein.dev/restarted-at: "2026-01-06T00:00:00Z" spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: NotIn + values: + - titan-22 + serviceAccountName: monitoring-vault-sync containers: - name: exporter image: python:3.12-alpine @@ -26,25 +47,10 @@ spec: args: - | set -euo pipefail + . /vault/secrets/postmark-env pip install --no-cache-dir prometheus-client==0.22.1 requests==2.32.3 exec python /app/monitoring_postmark_exporter.py env: - - name: POSTMARK_SERVER_TOKEN - valueFrom: - secretKeyRef: - name: postmark-exporter - key: server-token - - name: POSTMARK_SERVER_TOKEN_FALLBACK - valueFrom: - secretKeyRef: - name: postmark-exporter - key: server-token-fallback - - name: POSTMARK_SENDING_LIMIT - valueFrom: - secretKeyRef: - name: postmark-exporter - key: sending-limit - optional: true - name: POSTMARK_SENDING_LIMIT_WINDOW value: "30d" - name: POLL_INTERVAL_SECONDS diff --git a/services/monitoring/scripts/grafana_smtp_sync.sh b/services/monitoring/scripts/grafana_smtp_sync.sh deleted file mode 100644 index c8207ad..0000000 --- a/services/monitoring/scripts/grafana_smtp_sync.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/sh -set -euo pipefail - -SOURCE_NS=${SOURCE_NS:-mailu-mailserver} -SOURCE_SECRET=${SOURCE_SECRET:-mailu-postmark-relay} -TARGET_NS=${TARGET_NS:-monitoring} -TARGET_SECRET=${TARGET_SECRET:-grafana-smtp} - -tmp=$(mktemp) -cleanup() { rm -f "$tmp"; } -trap cleanup EXIT - -kubectl -n "$SOURCE_NS" get secret "$SOURCE_SECRET" -o json > "$tmp" - -pass=$(jq -r '.data["relay-password"]' "$tmp") -user=$pass - -if [ -z "$user" ] || [ -z "$pass" ] || [ "$user" = "null" ] || [ "$pass" = "null" ]; then - echo "missing credentials from $SOURCE_NS/$SOURCE_SECRET" >&2 - exit 1 -fi - -cat <&2 + exit 1 + fi +fi + +if [ -n "${VAULT_COPY_FILES:-}" ]; then + old_ifs="$IFS" + IFS=',' + for pair in ${VAULT_COPY_FILES}; do + src="${pair%%:*}" + dest="${pair#*:}" + if [ -z "${src}" ] || [ -z "${dest}" ]; then + echo "Vault copy entry malformed: ${pair}" >&2 + exit 1 + fi + if [ ! -f "${src}" ]; then + echo "Vault file not found: ${src}" >&2 + exit 1 + fi + mkdir -p "$(dirname "${dest}")" + cp "${src}" "${dest}" + done + IFS="$old_ifs" +fi + +exec "$@" diff --git a/services/monitoring/secretproviderclass.yaml b/services/monitoring/secretproviderclass.yaml new file mode 100644 index 0000000..8a6c5fb --- /dev/null +++ b/services/monitoring/secretproviderclass.yaml @@ -0,0 +1,21 @@ +# services/monitoring/secretproviderclass.yaml +apiVersion: secrets-store.csi.x-k8s.io/v1 +kind: SecretProviderClass +metadata: + name: monitoring-vault + namespace: monitoring +spec: + provider: vault + parameters: + vaultAddress: "http://vault.vault.svc.cluster.local:8200" + roleName: "monitoring" + objects: | + - objectName: "harbor-pull__dockerconfigjson" + secretPath: "kv/data/atlas/harbor-pull/monitoring" + secretKey: "dockerconfigjson" + secretObjects: + - secretName: harbor-regcred + type: kubernetes.io/dockerconfigjson + data: + - objectName: harbor-pull__dockerconfigjson + key: .dockerconfigjson diff --git a/services/monitoring/vault-serviceaccount.yaml b/services/monitoring/vault-serviceaccount.yaml new file mode 100644 index 0000000..fa23093 --- /dev/null +++ b/services/monitoring/vault-serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/monitoring/vault-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: monitoring-vault-sync + namespace: monitoring diff --git a/services/monitoring/vault-sync-deployment.yaml b/services/monitoring/vault-sync-deployment.yaml new file mode 100644 index 0000000..d335330 --- /dev/null +++ b/services/monitoring/vault-sync-deployment.yaml @@ -0,0 +1,34 @@ +# services/monitoring/vault-sync-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: monitoring-vault-sync + namespace: monitoring +spec: + replicas: 1 + selector: + matchLabels: + app: monitoring-vault-sync + template: + metadata: + labels: + app: monitoring-vault-sync + spec: + serviceAccountName: monitoring-vault-sync + containers: + - name: sync + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - "sleep infinity" + volumeMounts: + - name: vault-secrets + mountPath: /vault/secrets + readOnly: true + volumes: + - name: vault-secrets + csi: + driver: secrets-store.csi.k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: monitoring-vault diff --git a/services/nextcloud-mail-sync/cronjob.yaml b/services/nextcloud-mail-sync/cronjob.yaml index 9976d8e..2073d76 100644 --- a/services/nextcloud-mail-sync/cronjob.yaml +++ b/services/nextcloud-mail-sync/cronjob.yaml @@ -4,6 +4,8 @@ kind: CronJob metadata: name: nextcloud-mail-sync namespace: nextcloud + labels: + atlas.bstein.dev/glue: "true" spec: schedule: "0 5 * * *" concurrencyPolicy: Forbid @@ -12,52 +14,61 @@ spec: jobTemplate: spec: template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "nextcloud" + vault.hashicorp.com/agent-inject-secret-nextcloud-env.sh: "kv/data/atlas/nextcloud/nextcloud-db" + vault.hashicorp.com/agent-inject-template-nextcloud-env.sh: | + {{ with secret "kv/data/atlas/nextcloud/nextcloud-db" }} + export POSTGRES_DB="{{ .Data.data.database }}" + export POSTGRES_USER="{{ index .Data.data "db-username" }}" + export POSTGRES_PASSWORD="{{ index .Data.data "db-password" }}" + {{ end }} + {{ with secret "kv/data/atlas/nextcloud/nextcloud-admin" }} + export NEXTCLOUD_ADMIN_USER="{{ index .Data.data "admin-user" }}" + export NEXTCLOUD_ADMIN_PASSWORD="{{ index .Data.data "admin-password" }}" + {{ end }} + export ADMIN_USER="${NEXTCLOUD_ADMIN_USER}" + export ADMIN_PASS="${NEXTCLOUD_ADMIN_PASSWORD}" + {{ with secret "kv/data/atlas/nextcloud/nextcloud-oidc" }} + export OIDC_CLIENT_ID="{{ index .Data.data "client-id" }}" + export OIDC_CLIENT_SECRET="{{ index .Data.data "client-secret" }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export SMTP_NAME="{{ index .Data.data "apikey" }}" + export SMTP_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KC_ADMIN_USER="{{ .Data.data.username }}" + export KC_ADMIN_PASS="{{ .Data.data.password }}" + {{ end }} spec: + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" restartPolicy: OnFailure securityContext: runAsUser: 0 runAsGroup: 0 + serviceAccountName: nextcloud-vault containers: - name: mail-sync image: nextcloud:29-apache imagePullPolicy: IfNotPresent command: - - /bin/bash - - /sync/sync.sh + - /bin/sh + - -c env: - name: KC_BASE - value: https://sso.bstein.dev + value: http://keycloak.sso.svc.cluster.local - name: KC_REALM value: atlas - - name: KC_ADMIN_USER - valueFrom: - secretKeyRef: - name: nextcloud-keycloak-admin - key: username - - name: KC_ADMIN_PASS - valueFrom: - secretKeyRef: - name: nextcloud-keycloak-admin - key: password - name: MAILU_DOMAIN value: bstein.dev - name: POSTGRES_HOST value: postgres-service.postgres.svc.cluster.local - - name: POSTGRES_DB - valueFrom: - secretKeyRef: - name: nextcloud-db - key: database - - name: POSTGRES_USER - valueFrom: - secretKeyRef: - name: nextcloud-db - key: db-username - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: nextcloud-db - key: db-password resources: requests: cpu: 100m @@ -77,6 +88,11 @@ spec: - name: sync-script mountPath: /sync/sync.sh subPath: sync.sh + args: + - | + set -eu + . /vault/secrets/nextcloud-env.sh + exec /sync/sync.sh volumes: - name: nextcloud-config-pvc persistentVolumeClaim: diff --git a/services/nextcloud-mail-sync/scripts/nextcloud-mail-sync.sh b/services/nextcloud-mail-sync/scripts/nextcloud-mail-sync.sh index 6c883fc..732b9fb 100755 --- a/services/nextcloud-mail-sync/scripts/nextcloud-mail-sync.sh +++ b/services/nextcloud-mail-sync/scripts/nextcloud-mail-sync.sh @@ -54,38 +54,25 @@ list_mail_accounts() { local export_out # Nextcloud Mail does not provide a list command; export is safe (does not print passwords). - # Some occ commands emit to stderr; capture both streams so we don't mis-detect "no accounts". - if ! export_out=$(/usr/sbin/runuser -u www-data -- php occ mail:account:export "${user_id}" 2>&1); then + if ! export_out=$(/usr/sbin/runuser -u www-data -- php occ mail:account:export "${user_id}"); then echo "WARN: unable to export mail accounts for ${user_id}; skipping sync for safety" >&2 return 1 fi - # The export output is human-readable and includes blocks like: - # Account 10: - # - E-Mail: user@example.com - # Extract "account-id email" pairs. - awk ' - /^Account[[:space:]]+[0-9]+:/ { - id=$2; - sub(/:$/, "", id); - next; - } - id != "" && /@/ { - # Keep the regex simple (mawk does not support interval expressions like {2,}). - if (match($0, /[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+/)) { - printf("%s\t%s\n", id, substr($0, RSTART, RLENGTH)); - id=""; - } - } + awk -v OFS='\t' ' + BEGIN { IGNORECASE=1; id="" } + $1 == "Account" { id=$2; sub(":", "", id); next } + $1 == "-" && tolower($2) ~ /^e-?mail:$/ { if (id) print id, $3 } ' <<<"${export_out}" | sort -u } token=$( - curl -s -d "grant_type=password" \ - -d "client_id=admin-cli" \ - -d "username=${KC_ADMIN_USER}" \ - -d "password=${KC_ADMIN_PASS}" \ - "${KC_BASE}/realms/master/protocol/openid-connect/token" | jq -r '.access_token' + curl -fsS \ + --data-urlencode "grant_type=password" \ + --data-urlencode "client_id=admin-cli" \ + --data-urlencode "username=${KC_ADMIN_USER}" \ + --data-urlencode "password=${KC_ADMIN_PASS}" \ + "${KC_BASE}/realms/master/protocol/openid-connect/token" | jq -r '.access_token // empty' ) if [[ -z "${token}" || "${token}" == "null" ]]; then @@ -95,13 +82,17 @@ fi cd /var/www/html -kc_users_url="${KC_BASE}/admin/realms/${KC_REALM}/users?max=2000" +kc_users_url="${KC_BASE}/admin/realms/${KC_REALM}/users?max=2000&briefRepresentation=false" if [[ -n "${ONLY_USERNAME}" ]]; then username_q=$(jq -nr --arg v "${ONLY_USERNAME}" '$v|@uri') - kc_users_url="${KC_BASE}/admin/realms/${KC_REALM}/users?username=${username_q}&exact=true&max=1" + kc_users_url="${KC_BASE}/admin/realms/${KC_REALM}/users?username=${username_q}&exact=true&max=1&briefRepresentation=false" fi -users=$(curl -s -H "Authorization: Bearer ${token}" "${kc_users_url}") +users=$(curl -fsS -H "Authorization: Bearer ${token}" "${kc_users_url}") +if ! jq -e 'type == "array"' >/dev/null 2>&1 <<<"${users}"; then + echo "ERROR: Keycloak user list is not an array; aborting sync" >&2 + exit 1 +fi kc_set_user_mail_meta() { local user_id="${1}" diff --git a/services/nextcloud/collabora.yaml b/services/nextcloud/collabora.yaml index 1cda2ea..8a87821 100644 --- a/services/nextcloud/collabora.yaml +++ b/services/nextcloud/collabora.yaml @@ -20,7 +20,7 @@ spec: hardware: rpi5 containers: - name: collabora - image: collabora/code:latest + image: collabora/code@sha256:3c58d0e9bae75e4647467d0c7d91cb66f261d3e814709aed590b5c334a04db26 imagePullPolicy: IfNotPresent env: - name: domain @@ -61,6 +61,7 @@ metadata: annotations: cert-manager.io/cluster-issuer: letsencrypt-prod traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" spec: tls: - hosts: diff --git a/services/nextcloud/configmap.yaml b/services/nextcloud/configmap.yaml index 21098a2..7fd2ba9 100644 --- a/services/nextcloud/configmap.yaml +++ b/services/nextcloud/configmap.yaml @@ -24,7 +24,7 @@ data: 'mail_smtpauth' => true, 'mail_smtpauthtype' => 'LOGIN', 'mail_domain' => 'bstein.dev', - 'mail_from_address' => 'no-reply', + 'mail_from_address' => 'no-reply-nextcloud', 'datadirectory' => '/var/www/html/data', 'apps_paths' => array ( diff --git a/services/nextcloud/deployment.yaml b/services/nextcloud/deployment.yaml index 295435e..82f7538 100644 --- a/services/nextcloud/deployment.yaml +++ b/services/nextcloud/deployment.yaml @@ -15,6 +15,35 @@ spec: metadata: labels: app: nextcloud + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-init-first: "true" + vault.hashicorp.com/role: "nextcloud" + vault.hashicorp.com/agent-inject-secret-nextcloud-env.sh: "kv/data/atlas/nextcloud/nextcloud-db" + vault.hashicorp.com/agent-inject-template-nextcloud-env.sh: | + {{ with secret "kv/data/atlas/nextcloud/nextcloud-db" }} + export POSTGRES_DB="{{ .Data.data.database }}" + export POSTGRES_USER="{{ index .Data.data "db-username" }}" + export POSTGRES_PASSWORD="{{ index .Data.data "db-password" }}" + {{ end }} + {{ with secret "kv/data/atlas/nextcloud/nextcloud-admin" }} + export NEXTCLOUD_ADMIN_USER="{{ index .Data.data "admin-user" }}" + export NEXTCLOUD_ADMIN_PASSWORD="{{ index .Data.data "admin-password" }}" + {{ end }} + export ADMIN_USER="${NEXTCLOUD_ADMIN_USER}" + export ADMIN_PASS="${NEXTCLOUD_ADMIN_PASSWORD}" + {{ with secret "kv/data/atlas/nextcloud/nextcloud-oidc" }} + export OIDC_CLIENT_ID="{{ index .Data.data "client-id" }}" + export OIDC_CLIENT_SECRET="{{ index .Data.data "client-secret" }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export SMTP_NAME="{{ index .Data.data "apikey" }}" + export SMTP_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KC_ADMIN_USER="{{ .Data.data.username }}" + export KC_ADMIN_PASS="{{ .Data.data.password }}" + {{ end }} spec: nodeSelector: hardware: rpi5 @@ -22,6 +51,7 @@ spec: fsGroup: 33 runAsUser: 33 runAsGroup: 33 + serviceAccountName: nextcloud-vault initContainers: - name: seed-nextcloud-web image: nextcloud:29-apache @@ -80,6 +110,7 @@ spec: command: ["/bin/sh", "-c"] args: - | + . /vault/secrets/nextcloud-env.sh installed="$(su -s /bin/sh www-data -c "php /var/www/html/occ status" 2>/dev/null | awk '/installed:/{print $3}' || true)" if [ ! -s /var/www/html/config/config.php ]; then su -s /bin/sh www-data -c "php /var/www/html/occ maintenance:install --database pgsql --database-host \"${POSTGRES_HOST}\" --database-name \"${POSTGRES_DB}\" --database-user \"${POSTGRES_USER}\" --database-pass \"${POSTGRES_PASSWORD}\" --admin-user \"${NEXTCLOUD_ADMIN_USER}\" --admin-pass \"${NEXTCLOUD_ADMIN_PASSWORD}\" --data-dir /var/www/html/data" @@ -150,41 +181,6 @@ spec: env: - name: POSTGRES_HOST value: postgres-service.postgres.svc.cluster.local - - name: POSTGRES_DB - valueFrom: - secretKeyRef: - name: nextcloud-db - key: database - - name: POSTGRES_USER - valueFrom: - secretKeyRef: - name: nextcloud-db - key: db-username - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: nextcloud-db - key: db-password - - name: NEXTCLOUD_ADMIN_USER - valueFrom: - secretKeyRef: - name: nextcloud-admin - key: admin-user - - name: NEXTCLOUD_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: nextcloud-admin - key: admin-password - - name: OIDC_CLIENT_ID - valueFrom: - secretKeyRef: - name: nextcloud-oidc - key: client-id - - name: OIDC_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: nextcloud-oidc - key: client-secret volumeMounts: - name: nextcloud-web mountPath: /var/www/html @@ -201,36 +197,16 @@ spec: - name: nextcloud image: nextcloud:29-apache imagePullPolicy: IfNotPresent + command: ["/bin/sh", "-c"] + args: + - >- + . /vault/secrets/nextcloud-env.sh + && exec /entrypoint.sh apache2-foreground env: # DB (external secret required: nextcloud-db with keys username,password,database) - name: POSTGRES_HOST value: postgres-service.postgres.svc.cluster.local - - name: POSTGRES_DB - valueFrom: - secretKeyRef: - name: nextcloud-db - key: database - - name: POSTGRES_USER - valueFrom: - secretKeyRef: - name: nextcloud-db - key: db-username - - name: POSTGRES_PASSWORD - valueFrom: - secretKeyRef: - name: nextcloud-db - key: db-password # Admin bootstrap (external secret: nextcloud-admin with keys admin-user, admin-password) - - name: NEXTCLOUD_ADMIN_USER - valueFrom: - secretKeyRef: - name: nextcloud-admin - key: admin-user - - name: NEXTCLOUD_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: nextcloud-admin - key: admin-password - name: NEXTCLOUD_TRUSTED_DOMAINS value: cloud.bstein.dev - name: OVERWRITEHOST @@ -246,31 +222,11 @@ spec: value: "587" - name: SMTP_SECURE value: tls - - name: SMTP_NAME - valueFrom: - secretKeyRef: - name: nextcloud-smtp - key: smtp-username - - name: SMTP_PASSWORD - valueFrom: - secretKeyRef: - name: nextcloud-smtp - key: smtp-password - name: MAIL_FROM_ADDRESS - value: no-reply + value: no-reply-nextcloud - name: MAIL_DOMAIN value: bstein.dev # OIDC (external secret: nextcloud-oidc with keys client-id, client-secret) - - name: OIDC_CLIENT_ID - valueFrom: - secretKeyRef: - name: nextcloud-oidc - key: client-id - - name: OIDC_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: nextcloud-oidc - key: client-secret - name: NEXTCLOUD_UPDATE value: "1" - name: APP_INSTALL diff --git a/services/nextcloud/ingress.yaml b/services/nextcloud/ingress.yaml index 1c60282..0df2660 100644 --- a/services/nextcloud/ingress.yaml +++ b/services/nextcloud/ingress.yaml @@ -7,6 +7,7 @@ metadata: annotations: cert-manager.io/cluster-issuer: letsencrypt-prod traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" spec: tls: - hosts: diff --git a/services/nextcloud/kustomization.yaml b/services/nextcloud/kustomization.yaml index 14e0ec1..ebaeaaf 100644 --- a/services/nextcloud/kustomization.yaml +++ b/services/nextcloud/kustomization.yaml @@ -4,6 +4,7 @@ kind: Kustomization namespace: nextcloud resources: - namespace.yaml + - serviceaccount.yaml - configmap.yaml - pvc.yaml - deployment.yaml diff --git a/services/nextcloud/maintenance-cronjob.yaml b/services/nextcloud/maintenance-cronjob.yaml index 618f548..d4008c7 100644 --- a/services/nextcloud/maintenance-cronjob.yaml +++ b/services/nextcloud/maintenance-cronjob.yaml @@ -10,29 +10,55 @@ spec: jobTemplate: spec: template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "nextcloud" + vault.hashicorp.com/agent-inject-secret-nextcloud-env.sh: "kv/data/atlas/nextcloud/nextcloud-db" + vault.hashicorp.com/agent-inject-template-nextcloud-env.sh: | + {{ with secret "kv/data/atlas/nextcloud/nextcloud-db" }} + export POSTGRES_DB="{{ .Data.data.database }}" + export POSTGRES_USER="{{ index .Data.data "db-username" }}" + export POSTGRES_PASSWORD="{{ index .Data.data "db-password" }}" + {{ end }} + {{ with secret "kv/data/atlas/nextcloud/nextcloud-admin" }} + export NEXTCLOUD_ADMIN_USER="{{ index .Data.data "admin-user" }}" + export NEXTCLOUD_ADMIN_PASSWORD="{{ index .Data.data "admin-password" }}" + {{ end }} + export ADMIN_USER="${NEXTCLOUD_ADMIN_USER}" + export ADMIN_PASS="${NEXTCLOUD_ADMIN_PASSWORD}" + {{ with secret "kv/data/atlas/nextcloud/nextcloud-oidc" }} + export OIDC_CLIENT_ID="{{ index .Data.data "client-id" }}" + export OIDC_CLIENT_SECRET="{{ index .Data.data "client-secret" }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export SMTP_NAME="{{ index .Data.data "apikey" }}" + export SMTP_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} + {{ with secret "kv/data/atlas/shared/keycloak-admin" }} + export KC_ADMIN_USER="{{ .Data.data.username }}" + export KC_ADMIN_PASS="{{ .Data.data.password }}" + {{ end }} spec: restartPolicy: OnFailure securityContext: runAsUser: 0 runAsGroup: 0 + serviceAccountName: nextcloud-vault containers: - name: maintenance image: nextcloud:29-apache imagePullPolicy: IfNotPresent - command: ["/bin/bash", "/maintenance/maintenance.sh"] + command: ["/bin/sh", "-c"] + args: + - | + set -eu + . /vault/secrets/nextcloud-env.sh + exec /maintenance/maintenance.sh env: - name: NC_URL value: https://cloud.bstein.dev - - name: ADMIN_USER - valueFrom: - secretKeyRef: - name: nextcloud-admin - key: admin-user - - name: ADMIN_PASS - valueFrom: - secretKeyRef: - name: nextcloud-admin - key: admin-password volumeMounts: - name: nextcloud-web mountPath: /var/www/html diff --git a/services/nextcloud/serviceaccount.yaml b/services/nextcloud/serviceaccount.yaml new file mode 100644 index 0000000..c97cd5b --- /dev/null +++ b/services/nextcloud/serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/nextcloud/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: nextcloud-vault + namespace: nextcloud diff --git a/services/oauth2-proxy/deployment.yaml b/services/oauth2-proxy/deployment.yaml index 7c22a93..4af5ab1 100644 --- a/services/oauth2-proxy/deployment.yaml +++ b/services/oauth2-proxy/deployment.yaml @@ -15,7 +15,20 @@ spec: metadata: labels: app: oauth2-proxy + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "sso" + vault.hashicorp.com/agent-inject-secret-oidc-env: "kv/data/atlas/sso/oauth2-proxy-oidc" + vault.hashicorp.com/agent-inject-template-oidc-env: | + {{- with secret "kv/data/atlas/sso/oauth2-proxy-oidc" -}} + export OAUTH2_PROXY_CLIENT_ID="{{ .Data.data.client_id }}" + export OAUTH2_PROXY_CLIENT_SECRET="{{ .Data.data.client_secret }}" + export OAUTH2_PROXY_COOKIE_SECRET="{{ .Data.data.cookie_secret }}" + {{- end -}} spec: + serviceAccountName: sso-vault + imagePullSecrets: + - name: harbor-regcred nodeSelector: node-role.kubernetes.io/worker: "true" affinity: @@ -29,9 +42,11 @@ spec: values: ["rpi5","rpi4"] containers: - name: oauth2-proxy - image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 + image: registry.bstein.dev/tools/oauth2-proxy-vault:v7.6.0 imagePullPolicy: IfNotPresent + command: ["/entrypoint.sh"] args: + - /bin/oauth2-proxy - --provider=oidc - --redirect-url=https://auth.bstein.dev/oauth2/callback - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas @@ -50,21 +65,8 @@ spec: - --skip-jwt-bearer-tokens=true - --oidc-groups-claim=groups env: - - name: OAUTH2_PROXY_CLIENT_ID - valueFrom: - secretKeyRef: - name: oauth2-proxy-oidc - key: client_id - - name: OAUTH2_PROXY_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: oauth2-proxy-oidc - key: client_secret - - name: OAUTH2_PROXY_COOKIE_SECRET - valueFrom: - secretKeyRef: - name: oauth2-proxy-oidc - key: cookie_secret + - name: VAULT_ENV_FILE + value: /vault/secrets/oidc-env ports: - containerPort: 4180 name: http diff --git a/services/oauth2-proxy/ingress.yaml b/services/oauth2-proxy/ingress.yaml index 0f5830c..39f71da 100644 --- a/services/oauth2-proxy/ingress.yaml +++ b/services/oauth2-proxy/ingress.yaml @@ -7,6 +7,8 @@ metadata: annotations: cert-manager.io/cluster-issuer: letsencrypt traefik.ingress.kubernetes.io/router.middlewares: sso-oauth2-proxy-errors@kubernetescrd + traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" spec: ingressClassName: traefik rules: diff --git a/services/openldap/statefulset.yaml b/services/openldap/statefulset.yaml index ee8c792..210d16e 100644 --- a/services/openldap/statefulset.yaml +++ b/services/openldap/statefulset.yaml @@ -16,14 +16,30 @@ spec: metadata: labels: app: openldap + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "sso" + vault.hashicorp.com/agent-inject-secret-openldap-env: "kv/data/atlas/sso/openldap-admin" + vault.hashicorp.com/agent-inject-template-openldap-env: | + {{- with secret "kv/data/atlas/sso/openldap-admin" -}} + export LDAP_ADMIN_PASSWORD="{{ .Data.data.LDAP_ADMIN_PASSWORD }}" + export LDAP_CONFIG_PASSWORD="{{ .Data.data.LDAP_CONFIG_PASSWORD }}" + {{- end -}} spec: nodeSelector: kubernetes.io/arch: arm64 node-role.kubernetes.io/worker: "true" + serviceAccountName: sso-vault containers: - name: openldap image: docker.io/osixia/openldap:1.5.0 imagePullPolicy: IfNotPresent + command: ["/bin/sh", "-c"] + args: + - | + set -eu + . /vault/secrets/openldap-env + exec /usr/bin/python3 -u /container/tool/run ports: - name: ldap containerPort: 389 @@ -34,16 +50,6 @@ spec: value: Atlas - name: LDAP_DOMAIN value: bstein.dev - - name: LDAP_ADMIN_PASSWORD - valueFrom: - secretKeyRef: - name: openldap-admin - key: LDAP_ADMIN_PASSWORD - - name: LDAP_CONFIG_PASSWORD - valueFrom: - secretKeyRef: - name: openldap-admin - key: LDAP_CONFIG_PASSWORD readinessProbe: tcpSocket: port: ldap diff --git a/services/outline/deployment.yaml b/services/outline/deployment.yaml index 9f8160e..80a81dd 100644 --- a/services/outline/deployment.yaml +++ b/services/outline/deployment.yaml @@ -20,7 +20,36 @@ spec: metadata: labels: app: outline + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "outline" + vault.hashicorp.com/agent-inject-secret-outline-env.sh: "kv/data/atlas/outline/outline-db" + vault.hashicorp.com/agent-inject-template-outline-env.sh: | + {{ with secret "kv/data/atlas/outline/outline-db" }} + export DATABASE_URL="{{ .Data.data.DATABASE_URL }}" + {{ end }} + {{ with secret "kv/data/atlas/outline/outline-secrets" }} + export SECRET_KEY="{{ .Data.data.SECRET_KEY }}" + export UTILS_SECRET="{{ .Data.data.UTILS_SECRET }}" + {{ end }} + {{ with secret "kv/data/atlas/outline/outline-oidc" }} + export OIDC_AUTH_URI="{{ .Data.data.OIDC_AUTH_URI }}" + export OIDC_CLIENT_ID="{{ .Data.data.OIDC_CLIENT_ID }}" + export OIDC_CLIENT_SECRET="{{ .Data.data.OIDC_CLIENT_SECRET }}" + export OIDC_LOGOUT_URI="{{ .Data.data.OIDC_LOGOUT_URI }}" + export OIDC_TOKEN_URI="{{ .Data.data.OIDC_TOKEN_URI }}" + export OIDC_USERINFO_URI="{{ .Data.data.OIDC_USERINFO_URI }}" + {{ end }} + {{ with secret "kv/data/atlas/outline/outline-smtp" }} + export SMTP_HOST="{{ .Data.data.SMTP_HOST }}" + {{ end }} + export SMTP_FROM_EMAIL="no-reply-outline@bstein.dev" + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export SMTP_USERNAME="{{ index .Data.data "apikey" }}" + export SMTP_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} spec: + serviceAccountName: outline-vault nodeSelector: node-role.kubernetes.io/worker: "true" affinity: @@ -34,6 +63,11 @@ spec: containers: - name: outline image: outlinewiki/outline:1.2.0 + command: + - /bin/sh + - -c + args: + - . /vault/secrets/outline-env.sh && exec node build/server/index.js ports: - name: http containerPort: 3000 @@ -65,16 +99,7 @@ spec: - name: SMTP_SECURE value: "false" - name: SMTP_PORT - value: "25" - envFrom: - - secretRef: - name: outline-db - - secretRef: - name: outline-secrets - - secretRef: - name: outline-oidc - - secretRef: - name: outline-smtp + value: "587" volumeMounts: - name: user-data mountPath: /var/lib/outline/data diff --git a/services/outline/kustomization.yaml b/services/outline/kustomization.yaml index 33640f6..2fd0ae5 100644 --- a/services/outline/kustomization.yaml +++ b/services/outline/kustomization.yaml @@ -4,6 +4,7 @@ kind: Kustomization namespace: outline resources: - namespace.yaml + - serviceaccount.yaml - user-pvc.yaml - redis-deployment.yaml - redis-service.yaml diff --git a/services/outline/serviceaccount.yaml b/services/outline/serviceaccount.yaml new file mode 100644 index 0000000..8f15c78 --- /dev/null +++ b/services/outline/serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/outline/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: outline-vault + namespace: outline diff --git a/services/pegasus/deployment.yaml b/services/pegasus/deployment.yaml index 34270b0..bc3db70 100644 --- a/services/pegasus/deployment.yaml +++ b/services/pegasus/deployment.yaml @@ -1,3 +1,4 @@ +# services/pegasus/deployment.yaml apiVersion: apps/v1 kind: Deployment metadata: @@ -13,11 +14,25 @@ spec: maxUnavailable: 1 selector: { matchLabels: { app: pegasus } } template: - metadata: { labels: { app: pegasus } } + metadata: + labels: { app: pegasus } + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "pegasus" + vault.hashicorp.com/agent-inject-secret-pegasus-env: "kv/data/atlas/pegasus/pegasus-secrets" + vault.hashicorp.com/agent-inject-template-pegasus-env: | + {{- with secret "kv/data/atlas/pegasus/pegasus-secrets" -}} + export PEGASUS_SESSION_KEY="{{ .Data.data.PEGASUS_SESSION_KEY }}" + export JELLYFIN_URL="{{ .Data.data.JELLYFIN_URL }}" + export JELLYFIN_API_KEY="{{ .Data.data.JELLYFIN_API_KEY }}" + {{- end -}} spec: nodeSelector: kubernetes.io/arch: arm64 node-role.kubernetes.io/worker: "true" + serviceAccountName: pegasus-vault-sync + imagePullSecrets: + - name: harbor-regcred securityContext: runAsNonRoot: true runAsUser: 65532 @@ -57,9 +72,8 @@ spec: containers: - name: pegasus - image: registry.bstein.dev/streaming/pegasus:1.2.32 # {"$imagepolicy": "jellyfin:pegasus"} + image: registry.bstein.dev/streaming/pegasus-vault:1.2.32 # {"$imagepolicy": "jellyfin:pegasus"} imagePullPolicy: Always - command: ["/pegasus"] env: - name: PEGASUS_MEDIA_ROOT valueFrom: { configMapKeyRef: { name: pegasus-config, key: PEGASUS_MEDIA_ROOT } } @@ -67,12 +81,8 @@ spec: valueFrom: { configMapKeyRef: { name: pegasus-config, key: PEGASUS_BIND } } - name: PEGASUS_USER_MAP_FILE value: "/config/user-map.yaml" - - name: PEGASUS_SESSION_KEY - valueFrom: { secretKeyRef: { name: pegasus-secrets, key: PEGASUS_SESSION_KEY } } - - name: JELLYFIN_URL - valueFrom: { secretKeyRef: { name: pegasus-secrets, key: JELLYFIN_URL } } - - name: JELLYFIN_API_KEY - valueFrom: { secretKeyRef: { name: pegasus-secrets, key: JELLYFIN_API_KEY } } + - name: VAULT_ENV_FILE + value: /vault/secrets/pegasus-env - name: PEGASUS_DEBUG value: "1" - name: PEGASUS_DRY_RUN diff --git a/services/pegasus/image.yaml b/services/pegasus/image.yaml index 682ec83..5987815 100644 --- a/services/pegasus/image.yaml +++ b/services/pegasus/image.yaml @@ -5,7 +5,7 @@ metadata: name: pegasus namespace: jellyfin spec: - image: registry.bstein.dev/streaming/pegasus + image: registry.bstein.dev/streaming/pegasus-vault interval: 1m0s --- diff --git a/services/pegasus/kustomization.yaml b/services/pegasus/kustomization.yaml index 5902595..bef2b40 100644 --- a/services/pegasus/kustomization.yaml +++ b/services/pegasus/kustomization.yaml @@ -3,8 +3,11 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - configmap.yaml + - vault-serviceaccount.yaml + - secretproviderclass.yaml - service.yaml - deployment.yaml + - vault-sync-deployment.yaml - ingress.yaml patches: - target: { kind: Deployment, name: pegasus, namespace: jellyfin } diff --git a/services/pegasus/secretproviderclass.yaml b/services/pegasus/secretproviderclass.yaml new file mode 100644 index 0000000..b4621a5 --- /dev/null +++ b/services/pegasus/secretproviderclass.yaml @@ -0,0 +1,21 @@ +# services/pegasus/secretproviderclass.yaml +apiVersion: secrets-store.csi.x-k8s.io/v1 +kind: SecretProviderClass +metadata: + name: pegasus-vault + namespace: jellyfin +spec: + provider: vault + parameters: + vaultAddress: "http://vault.vault.svc.cluster.local:8200" + roleName: "pegasus" + objects: | + - objectName: "harbor-pull__dockerconfigjson" + secretPath: "kv/data/atlas/harbor-pull/jellyfin" + secretKey: "dockerconfigjson" + secretObjects: + - secretName: harbor-regcred + type: kubernetes.io/dockerconfigjson + data: + - objectName: harbor-pull__dockerconfigjson + key: .dockerconfigjson diff --git a/services/pegasus/vault-serviceaccount.yaml b/services/pegasus/vault-serviceaccount.yaml new file mode 100644 index 0000000..ed56930 --- /dev/null +++ b/services/pegasus/vault-serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/pegasus/vault-serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: pegasus-vault-sync + namespace: jellyfin diff --git a/services/pegasus/vault-sync-deployment.yaml b/services/pegasus/vault-sync-deployment.yaml new file mode 100644 index 0000000..6128d8d --- /dev/null +++ b/services/pegasus/vault-sync-deployment.yaml @@ -0,0 +1,34 @@ +# services/pegasus/vault-sync-deployment.yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: pegasus-vault-sync + namespace: jellyfin +spec: + replicas: 1 + selector: + matchLabels: + app: pegasus-vault-sync + template: + metadata: + labels: + app: pegasus-vault-sync + spec: + serviceAccountName: pegasus-vault-sync + containers: + - name: sync + image: alpine:3.20 + command: ["/bin/sh", "-c"] + args: + - "sleep infinity" + volumeMounts: + - name: vault-secrets + mountPath: /vault/secrets + readOnly: true + volumes: + - name: vault-secrets + csi: + driver: secrets-store.csi.k8s.io + readOnly: true + volumeAttributes: + secretProviderClass: pegasus-vault diff --git a/services/planka/deployment.yaml b/services/planka/deployment.yaml index 9524245..9750039 100644 --- a/services/planka/deployment.yaml +++ b/services/planka/deployment.yaml @@ -20,7 +20,39 @@ spec: metadata: labels: app: planka + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/role: "planka" + vault.hashicorp.com/agent-inject-secret-planka-env.sh: "kv/data/atlas/planka/planka-db" + vault.hashicorp.com/agent-inject-template-planka-env.sh: | + {{ with secret "kv/data/atlas/planka/planka-db" }} + export DATABASE_URL="{{ .Data.data.DATABASE_URL }}" + {{ end }} + {{ with secret "kv/data/atlas/planka/planka-secrets" }} + export SECRET_KEY="{{ .Data.data.SECRET_KEY }}" + {{ end }} + {{ with secret "kv/data/atlas/planka/planka-oidc" }} + export OIDC_CLIENT_ID="{{ .Data.data.OIDC_CLIENT_ID }}" + export OIDC_CLIENT_SECRET="{{ .Data.data.OIDC_CLIENT_SECRET }}" + export OIDC_ENFORCED="{{ .Data.data.OIDC_ENFORCED }}" + export OIDC_IGNORE_ROLES="{{ .Data.data.OIDC_IGNORE_ROLES }}" + export OIDC_ISSUER="{{ .Data.data.OIDC_ISSUER }}" + export OIDC_SCOPES="{{ .Data.data.OIDC_SCOPES }}" + export OIDC_USE_OAUTH_CALLBACK="{{ .Data.data.OIDC_USE_OAUTH_CALLBACK }}" + {{ end }} + {{ with secret "kv/data/atlas/planka/planka-smtp" }} + export SMTP_HOST="{{ .Data.data.SMTP_HOST }}" + export SMTP_PORT="{{ .Data.data.SMTP_PORT }}" + export SMTP_SECURE="{{ .Data.data.SMTP_SECURE }}" + export SMTP_TLS_REJECT_UNAUTHORIZED="{{ .Data.data.SMTP_TLS_REJECT_UNAUTHORIZED }}" + {{ end }} + export SMTP_FROM="no-reply-planka@bstein.dev" + {{ with secret "kv/data/atlas/shared/postmark-relay" }} + export SMTP_USER="{{ index .Data.data "apikey" }}" + export SMTP_PASSWORD="{{ index .Data.data "apikey" }}" + {{ end }} spec: + serviceAccountName: planka-vault nodeSelector: node-role.kubernetes.io/worker: "true" affinity: @@ -58,6 +90,11 @@ spec: containers: - name: planka image: ghcr.io/plankanban/planka:2.0.0-rc.4 + command: + - /bin/sh + - -c + args: + - . /vault/secrets/planka-env.sh && exec node app.js --prod ports: - name: http containerPort: 1337 @@ -66,23 +103,12 @@ spec: value: https://tasks.bstein.dev - name: TRUST_PROXY value: "true" - - name: OIDC_IGNORE_ROLES - value: "false" - name: OIDC_ADMIN_ROLES value: admin - name: OIDC_PROJECT_OWNER_ROLES - value: planka-users + value: "*" - name: OIDC_ROLES_ATTRIBUTE value: groups - envFrom: - - secretRef: - name: planka-db - - secretRef: - name: planka-secrets - - secretRef: - name: planka-oidc - - secretRef: - name: planka-smtp volumeMounts: - name: user-data mountPath: /app/public/user-avatars diff --git a/services/planka/kustomization.yaml b/services/planka/kustomization.yaml index ab42954..db19e6e 100644 --- a/services/planka/kustomization.yaml +++ b/services/planka/kustomization.yaml @@ -4,6 +4,7 @@ kind: Kustomization namespace: planka resources: - namespace.yaml + - serviceaccount.yaml - user-data-pvc.yaml - app-pvc.yaml - deployment.yaml diff --git a/services/planka/serviceaccount.yaml b/services/planka/serviceaccount.yaml new file mode 100644 index 0000000..ca4f437 --- /dev/null +++ b/services/planka/serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/planka/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: planka-vault + namespace: planka diff --git a/services/sui-metrics/overlays/atlas/patch-node-selector.yaml b/services/sui-metrics/overlays/atlas/patch-node-selector.yaml index e97ccb7..d4f3360 100644 --- a/services/sui-metrics/overlays/atlas/patch-node-selector.yaml +++ b/services/sui-metrics/overlays/atlas/patch-node-selector.yaml @@ -8,4 +8,4 @@ spec: template: spec: nodeSelector: - kubernetes.io/hostname: titan-24 + hardware: rpi5 diff --git a/services/vault/ingress.yaml b/services/vault/ingress.yaml index 1d9d523..b768381 100644 --- a/services/vault/ingress.yaml +++ b/services/vault/ingress.yaml @@ -7,6 +7,7 @@ metadata: annotations: kubernetes.io/ingress.class: traefik traefik.ingress.kubernetes.io/router.entrypoints: websecure + traefik.ingress.kubernetes.io/router.tls: "true" spec: ingressClassName: traefik tls: diff --git a/services/vault/k8s-auth-config-cronjob.yaml b/services/vault/k8s-auth-config-cronjob.yaml new file mode 100644 index 0000000..29e8e80 --- /dev/null +++ b/services/vault/k8s-auth-config-cronjob.yaml @@ -0,0 +1,54 @@ +# services/vault/k8s-auth-config-cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: vault-k8s-auth-config + namespace: vault + labels: + atlas.bstein.dev/glue: "true" +spec: + schedule: "*/15 * * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 1 + template: + spec: + serviceAccountName: vault-admin + restartPolicy: Never + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + containers: + - name: configure-k8s-auth + image: hashicorp/vault:1.17.6 + imagePullPolicy: IfNotPresent + command: + - sh + - /scripts/vault_k8s_auth_configure.sh + env: + - name: VAULT_ADDR + value: http://10.43.57.249:8200 + - name: VAULT_K8S_ROLE + value: vault-admin + - name: VAULT_K8S_TOKEN_REVIEWER_JWT_FILE + value: /var/run/secrets/vault-token-reviewer/token + - name: VAULT_K8S_ROLE_TTL + value: 1h + volumeMounts: + - name: k8s-auth-config-script + mountPath: /scripts + readOnly: true + - name: token-reviewer + mountPath: /var/run/secrets/vault-token-reviewer + readOnly: true + volumes: + - name: k8s-auth-config-script + configMap: + name: vault-k8s-auth-config-script + defaultMode: 0555 + - name: token-reviewer + secret: + secretName: vault-admin-token-reviewer diff --git a/services/vault/kustomization.yaml b/services/vault/kustomization.yaml index b39fc48..060077b 100644 --- a/services/vault/kustomization.yaml +++ b/services/vault/kustomization.yaml @@ -5,10 +5,26 @@ namespace: vault resources: - namespace.yaml - serviceaccount.yaml + - serviceaccount-admin.yaml + - token-reviewer-secret.yaml - rbac.yaml - configmap.yaml - statefulset.yaml + - k8s-auth-config-cronjob.yaml + - oidc-config-cronjob.yaml - service.yaml - ingress.yaml - certificate.yaml - serverstransport.yaml +generatorOptions: + disableNameSuffixHash: true +configMapGenerator: + - name: vault-oidc-config-script + files: + - vault_oidc_configure.sh=scripts/vault_oidc_configure.sh + - name: vault-k8s-auth-config-script + files: + - vault_k8s_auth_configure.sh=scripts/vault_k8s_auth_configure.sh + - name: vault-entrypoint + files: + - vault-entrypoint.sh=scripts/vault-entrypoint.sh diff --git a/services/vault/oidc-config-cronjob.yaml b/services/vault/oidc-config-cronjob.yaml new file mode 100644 index 0000000..013c9f3 --- /dev/null +++ b/services/vault/oidc-config-cronjob.yaml @@ -0,0 +1,82 @@ +# services/vault/oidc-config-cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: vault-oidc-config + namespace: vault + labels: + atlas.bstein.dev/glue: "true" +spec: + schedule: "*/15 * * * *" + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 1 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 1 + template: + metadata: + annotations: + vault.hashicorp.com/agent-inject: "true" + vault.hashicorp.com/agent-pre-populate-only: "true" + vault.hashicorp.com/role: "vault-admin" + vault.hashicorp.com/agent-inject-secret-vault-oidc-env.sh: "kv/data/atlas/vault/vault-oidc-config" + vault.hashicorp.com/agent-inject-template-vault-oidc-env.sh: | + {{ with secret "kv/data/atlas/vault/vault-oidc-config" }} + export VAULT_OIDC_DISCOVERY_URL="{{ .Data.data.discovery_url }}" + export VAULT_OIDC_CLIENT_ID="{{ .Data.data.client_id }}" + export VAULT_OIDC_CLIENT_SECRET="{{ .Data.data.client_secret }}" + export VAULT_OIDC_DEFAULT_ROLE="{{ .Data.data.default_role }}" + export VAULT_OIDC_SCOPES="{{ .Data.data.scopes }}" + export VAULT_OIDC_USER_CLAIM="{{ .Data.data.user_claim }}" + export VAULT_OIDC_GROUPS_CLAIM="{{ .Data.data.groups_claim }}" + export VAULT_OIDC_TOKEN_POLICIES="{{ .Data.data.token_policies }}" + export VAULT_OIDC_ADMIN_GROUP="{{ .Data.data.admin_group }}" + export VAULT_OIDC_ADMIN_POLICIES="{{ .Data.data.admin_policies }}" + export VAULT_OIDC_DEV_GROUP="{{ .Data.data.dev_group }}" + export VAULT_OIDC_DEV_POLICIES="{{ .Data.data.dev_policies }}" + export VAULT_OIDC_USER_GROUP="{{ .Data.data.user_group }}" + export VAULT_OIDC_USER_POLICIES="{{ .Data.data.user_policies }}" + export VAULT_OIDC_REDIRECT_URIS="{{ .Data.data.redirect_uris }}" + export VAULT_OIDC_BOUND_AUDIENCES="{{ .Data.data.bound_audiences }}" + export VAULT_OIDC_BOUND_CLAIMS="{{ .Data.data.bound_claims }}" + export VAULT_OIDC_BOUND_CLAIMS_TYPE="{{ .Data.data.bound_claims_type }}" + {{ end }} + spec: + serviceAccountName: vault-admin + restartPolicy: Never + nodeSelector: + kubernetes.io/arch: arm64 + node-role.kubernetes.io/worker: "true" + containers: + - name: configure-oidc + image: hashicorp/vault:1.17.6 + imagePullPolicy: IfNotPresent + command: + - /entrypoint.sh + args: + - sh + - /scripts/vault_oidc_configure.sh + env: + - name: VAULT_ADDR + value: http://10.43.57.249:8200 + - name: VAULT_K8S_ROLE + value: vault-admin + - name: VAULT_ENV_FILE + value: /vault/secrets/vault-oidc-env.sh + volumeMounts: + - name: vault-entrypoint + mountPath: /entrypoint.sh + subPath: vault-entrypoint.sh + - name: oidc-config-script + mountPath: /scripts + readOnly: true + volumes: + - name: vault-entrypoint + configMap: + name: vault-entrypoint + defaultMode: 493 + - name: oidc-config-script + configMap: + name: vault-oidc-config-script + defaultMode: 0555 diff --git a/services/vault/rbac.yaml b/services/vault/rbac.yaml index d1caa18..01dc405 100644 --- a/services/vault/rbac.yaml +++ b/services/vault/rbac.yaml @@ -11,3 +11,6 @@ subjects: - kind: ServiceAccount name: vault namespace: vault + - kind: ServiceAccount + name: vault-admin + namespace: vault diff --git a/services/vault/scripts/vault-entrypoint.sh b/services/vault/scripts/vault-entrypoint.sh new file mode 100644 index 0000000..fa3b791 --- /dev/null +++ b/services/vault/scripts/vault-entrypoint.sh @@ -0,0 +1,34 @@ +#!/bin/sh +set -eu + +if [ -n "${VAULT_ENV_FILE:-}" ]; then + if [ -f "${VAULT_ENV_FILE}" ]; then + # shellcheck disable=SC1090 + . "${VAULT_ENV_FILE}" + else + echo "Vault env file not found: ${VAULT_ENV_FILE}" >&2 + exit 1 + fi +fi + +if [ -n "${VAULT_COPY_FILES:-}" ]; then + old_ifs="$IFS" + IFS=',' + for pair in ${VAULT_COPY_FILES}; do + src="${pair%%:*}" + dest="${pair#*:}" + if [ -z "${src}" ] || [ -z "${dest}" ]; then + echo "Vault copy entry malformed: ${pair}" >&2 + exit 1 + fi + if [ ! -f "${src}" ]; then + echo "Vault file not found: ${src}" >&2 + exit 1 + fi + mkdir -p "$(dirname "${dest}")" + cp "${src}" "${dest}" + done + IFS="$old_ifs" +fi + +exec "$@" diff --git a/services/vault/scripts/vault_k8s_auth_configure.sh b/services/vault/scripts/vault_k8s_auth_configure.sh new file mode 100644 index 0000000..202879f --- /dev/null +++ b/services/vault/scripts/vault_k8s_auth_configure.sh @@ -0,0 +1,254 @@ +#!/usr/bin/env sh +set -eu + +log() { echo "[vault-k8s-auth] $*"; } + +vault_cmd() { + for attempt in 1 2 3 4 5 6; do + set +e + output="$(vault "$@" 2>&1)" + status=$? + set -e + if [ "${status}" -eq 0 ]; then + printf '%s' "${output}" + return 0 + fi + log "vault command failed; retrying (${attempt}/6)" + sleep $((attempt * 2)) + done + log "vault command failed; giving up" + return 1 +} + +ensure_token() { + if [ -n "${VAULT_TOKEN:-}" ]; then + return + fi + role="${VAULT_K8S_ROLE:-vault}" + jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" + if ! VAULT_TOKEN="$(vault_cmd write -field=token auth/kubernetes/login role="${role}" jwt="${jwt}")"; then + log "kubernetes auth login failed; set VAULT_TOKEN or fix role ${role}" + exit 1 + fi + export VAULT_TOKEN +} + +if ! status_json="$(vault_cmd status -format=json)"; then + log "vault status failed; check VAULT_ADDR and VAULT_TOKEN" + exit 1 +fi + +if ! printf '%s' "${status_json}" | grep -q '"initialized":[[:space:]]*true'; then + log "vault not initialized; skipping" + exit 0 +fi + +if printf '%s' "${status_json}" | grep -q '"sealed":[[:space:]]*true'; then + log "vault sealed; skipping" + exit 0 +fi + +ensure_token + +k8s_host="https://${KUBERNETES_SERVICE_HOST}:443" +k8s_ca="$(cat /var/run/secrets/kubernetes.io/serviceaccount/ca.crt)" +k8s_token="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" +role_ttl="${VAULT_K8S_ROLE_TTL:-1h}" +token_reviewer_jwt="${VAULT_K8S_TOKEN_REVIEWER_JWT:-}" + +if [ -z "${token_reviewer_jwt}" ] && [ -n "${VAULT_K8S_TOKEN_REVIEWER_JWT_FILE:-}" ] && [ -r "${VAULT_K8S_TOKEN_REVIEWER_JWT_FILE}" ]; then + token_reviewer_jwt="$(cat "${VAULT_K8S_TOKEN_REVIEWER_JWT_FILE}")" +fi +if [ -z "${token_reviewer_jwt}" ]; then + token_reviewer_jwt="${k8s_token}" +fi + +if ! vault_cmd auth list -format=json | grep -q '"kubernetes/"'; then + log "enabling kubernetes auth" + vault_cmd auth enable kubernetes +fi + +log "configuring kubernetes auth" +vault_cmd write auth/kubernetes/config \ + token_reviewer_jwt="${token_reviewer_jwt}" \ + kubernetes_host="${k8s_host}" \ + kubernetes_ca_cert="${k8s_ca}" + +write_raw_policy() { + name="$1" + body="$2" + log "writing policy ${name}" + printf '%s\n' "${body}" | vault_cmd policy write "${name}" - +} + +write_policy_and_role() { + role="$1" + namespace="$2" + service_accounts="$3" + read_paths="$4" + write_paths="$5" + + policy_body="" + for path in ${read_paths}; do + policy_body="${policy_body} +path \"kv/data/atlas/${path}\" { + capabilities = [\"read\"] +} +path \"kv/metadata/atlas/${path}\" { + capabilities = [\"list\"] +} +" + done + for path in ${write_paths}; do + policy_body="${policy_body} +path \"kv/data/atlas/${path}\" { + capabilities = [\"create\", \"update\", \"read\"] +} +path \"kv/metadata/atlas/${path}\" { + capabilities = [\"list\"] +} +" + done + + log "writing policy ${role}" + printf '%s\n' "${policy_body}" | vault_cmd policy write "${role}" - + + log "writing role ${role}" + vault_cmd write "auth/kubernetes/role/${role}" \ + bound_service_account_names="${service_accounts}" \ + bound_service_account_namespaces="${namespace}" \ + policies="${role}" \ + ttl="${role_ttl}" +} + +vault_admin_policy=' +path "sys/auth" { + capabilities = ["read"] +} +path "sys/auth/*" { + capabilities = ["create", "update", "delete", "sudo", "read"] +} +path "auth/kubernetes/*" { + capabilities = ["create", "update", "read"] +} +path "auth/oidc/*" { + capabilities = ["create", "update", "read"] +} +path "sys/policies/acl" { + capabilities = ["list"] +} +path "sys/policies/acl/*" { + capabilities = ["create", "update", "read"] +} +path "sys/internal/ui/mounts" { + capabilities = ["read"] +} +path "sys/mounts" { + capabilities = ["read"] +} +path "sys/mounts/auth/*" { + capabilities = ["read", "update", "sudo"] +} +path "kv/data/atlas/vault/*" { + capabilities = ["read"] +} +path "kv/metadata/atlas/vault/*" { + capabilities = ["list"] +} +path "kv/data/*" { + capabilities = ["create", "update", "read", "delete", "patch"] +} +path "kv/metadata" { + capabilities = ["list"] +} +path "kv/metadata/*" { + capabilities = ["read", "list", "delete"] +} +path "kv/data/atlas/shared/*" { + capabilities = ["create", "update", "read", "patch"] +} +path "kv/metadata/atlas/shared/*" { + capabilities = ["list"] +} +' + +write_raw_policy "vault-admin" "${vault_admin_policy}" +dev_kv_policy=' +path "kv/metadata" { + capabilities = ["list"] +} +path "kv/metadata/atlas" { + capabilities = ["list"] +} +path "kv/metadata/atlas/shared" { + capabilities = ["list"] +} +path "kv/metadata/atlas/shared/*" { + capabilities = ["list"] +} +path "kv/data/atlas/shared/*" { + capabilities = ["read"] +} +' +write_raw_policy "dev-kv" "${dev_kv_policy}" +log "writing role vault-admin" +vault_cmd write "auth/kubernetes/role/vault-admin" \ + bound_service_account_names="vault-admin" \ + bound_service_account_namespaces="vault" \ + policies="vault-admin" \ + ttl="${role_ttl}" + +write_policy_and_role "outline" "outline" "outline-vault" \ + "outline/* shared/postmark-relay" "" +write_policy_and_role "planka" "planka" "planka-vault" \ + "planka/* shared/postmark-relay" "" +write_policy_and_role "bstein-dev-home" "bstein-dev-home" "bstein-dev-home,bstein-dev-home-vault-sync" \ + "portal/* shared/chat-ai-keys-runtime shared/portal-e2e-client shared/postmark-relay mailu/mailu-initial-account-secret harbor-pull/bstein-dev-home" "" +write_policy_and_role "gitea" "gitea" "gitea-vault" \ + "gitea/*" "" +write_policy_and_role "vaultwarden" "vaultwarden" "vaultwarden-vault" \ + "vaultwarden/* mailu/mailu-initial-account-secret" "" +write_policy_and_role "sso" "sso" "sso-vault,sso-vault-sync,mas-secrets-ensure" \ + "sso/* portal/bstein-dev-home-keycloak-admin shared/keycloak-admin shared/portal-e2e-client shared/postmark-relay harbor-pull/sso" "" +write_policy_and_role "mailu-mailserver" "mailu-mailserver" "mailu-vault-sync" \ + "mailu/* shared/postmark-relay harbor-pull/mailu-mailserver" "" +write_policy_and_role "harbor" "harbor" "harbor-vault-sync" \ + "harbor/* harbor-pull/harbor" "" +write_policy_and_role "nextcloud" "nextcloud" "nextcloud-vault" \ + "nextcloud/* shared/keycloak-admin shared/postmark-relay" "" +write_policy_and_role "comms" "comms" "comms-vault,atlasbot" \ + "comms/* shared/chat-ai-keys-runtime harbor-pull/comms" "" +write_policy_and_role "jenkins" "jenkins" "jenkins" \ + "jenkins/*" "" +write_policy_and_role "monitoring" "monitoring" "monitoring-vault-sync" \ + "monitoring/* shared/postmark-relay harbor-pull/monitoring" "" +write_policy_and_role "logging" "logging" "logging-vault-sync" \ + "logging/* harbor-pull/logging" "" +write_policy_and_role "pegasus" "jellyfin" "pegasus-vault-sync" \ + "pegasus/* harbor-pull/jellyfin" "" +write_policy_and_role "crypto" "crypto" "crypto-vault-sync" \ + "crypto/* harbor-pull/crypto" "" +write_policy_and_role "health" "health" "health-vault-sync" \ + "health/*" "" +write_policy_and_role "finance" "finance" "finance-vault" \ + "finance/* shared/postmark-relay" "" +write_policy_and_role "finance-secrets" "finance" "finance-secrets-ensure" \ + "" \ + "finance/*" +write_policy_and_role "longhorn" "longhorn-system" "longhorn-vault,longhorn-vault-sync" \ + "longhorn/* harbor-pull/longhorn" "" +write_policy_and_role "postgres" "postgres" "postgres-vault" \ + "postgres/postgres-db" "" +write_policy_and_role "vault" "vault" "vault" \ + "vault/*" "" + +write_policy_and_role "sso-secrets" "sso" "mas-secrets-ensure" \ + "shared/keycloak-admin" \ + "harbor/harbor-oidc vault/vault-oidc-config comms/synapse-oidc logging/oauth2-proxy-logs-oidc finance/actual-oidc" +write_policy_and_role "crypto-secrets" "crypto" "crypto-secrets-ensure" \ + "" \ + "crypto/wallet-monero-temp-rpc-auth" +write_policy_and_role "comms-secrets" "comms" \ + "comms-secrets-ensure,mas-db-ensure,mas-admin-client-secret-writer,othrys-synapse-signingkey-job" \ + "" \ + "comms/turn-shared-secret comms/livekit-api comms/synapse-redis comms/synapse-macaroon comms/atlasbot-credentials-runtime comms/synapse-db comms/mas-db comms/mas-admin-client-runtime comms/mas-secrets-runtime comms/othrys-synapse-signingkey" diff --git a/services/vault/scripts/vault_oidc_configure.sh b/services/vault/scripts/vault_oidc_configure.sh new file mode 100644 index 0000000..70da3b7 --- /dev/null +++ b/services/vault/scripts/vault_oidc_configure.sh @@ -0,0 +1,166 @@ +#!/usr/bin/env sh +set -eu + +log() { echo "[vault-oidc] $*"; } + +vault_cmd() { + for attempt in 1 2 3 4 5 6; do + set +e + output="$(vault "$@" 2>&1)" + status=$? + set -e + if [ "${status}" -eq 0 ]; then + printf '%s' "${output}" + return 0 + fi + log "vault command failed; retrying (${attempt}/6)" + sleep $((attempt * 2)) + done + log "vault command failed; giving up" + return 1 +} + +ensure_token() { + if [ -n "${VAULT_TOKEN:-}" ]; then + return + fi + role="${VAULT_K8S_ROLE:-vault}" + jwt="$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" + if ! VAULT_TOKEN="$(vault_cmd write -field=token auth/kubernetes/login role="${role}" jwt="${jwt}")"; then + log "kubernetes auth login failed; set VAULT_TOKEN or fix role ${role}" + exit 1 + fi + export VAULT_TOKEN +} + +if ! status_json="$(vault_cmd status -format=json)"; then + log "vault status failed; check VAULT_ADDR and VAULT_TOKEN" + exit 1 +fi + +if ! printf '%s' "${status_json}" | grep -q '"initialized":[[:space:]]*true'; then + log "vault not initialized; skipping" + exit 0 +fi + +if printf '%s' "${status_json}" | grep -q '"sealed":[[:space:]]*true'; then + log "vault sealed; skipping" + exit 0 +fi + +ensure_token + +: "${VAULT_OIDC_DISCOVERY_URL:?set VAULT_OIDC_DISCOVERY_URL}" +: "${VAULT_OIDC_CLIENT_ID:?set VAULT_OIDC_CLIENT_ID}" +: "${VAULT_OIDC_CLIENT_SECRET:?set VAULT_OIDC_CLIENT_SECRET}" + +default_role="${VAULT_OIDC_DEFAULT_ROLE:-admin}" +scopes="${VAULT_OIDC_SCOPES:-openid profile email groups}" +user_claim="${VAULT_OIDC_USER_CLAIM:-preferred_username}" +groups_claim="${VAULT_OIDC_GROUPS_CLAIM:-groups}" +redirect_uris="${VAULT_OIDC_REDIRECT_URIS:-https://secret.bstein.dev/ui/vault/auth/oidc/oidc/callback}" +bound_audiences="${VAULT_OIDC_BOUND_AUDIENCES:-${VAULT_OIDC_CLIENT_ID}}" +bound_claims_type="${VAULT_OIDC_BOUND_CLAIMS_TYPE:-string}" +bound_claims_type="$(printf '%s' "${bound_claims_type}" | tr -d '[:space:]')" +if [ -z "${bound_claims_type}" ] || [ "${bound_claims_type}" = "" ]; then + bound_claims_type="string" +fi + +admin_group="${VAULT_OIDC_ADMIN_GROUP:-admin}" +admin_policies="${VAULT_OIDC_ADMIN_POLICIES:-default,vault-admin}" +dev_group="${VAULT_OIDC_DEV_GROUP:-dev}" +dev_policies="${VAULT_OIDC_DEV_POLICIES:-default,dev-kv}" +user_group="${VAULT_OIDC_USER_GROUP:-${dev_group}}" +user_policies="${VAULT_OIDC_USER_POLICIES:-${VAULT_OIDC_TOKEN_POLICIES:-${dev_policies}}}" + +if ! vault_cmd auth list -format=json | grep -q '"oidc/"'; then + log "enabling oidc auth method" + vault_cmd auth enable oidc +fi + +log "configuring oidc auth" +vault_cmd write auth/oidc/config \ + oidc_discovery_url="${VAULT_OIDC_DISCOVERY_URL}" \ + oidc_client_id="${VAULT_OIDC_CLIENT_ID}" \ + oidc_client_secret="${VAULT_OIDC_CLIENT_SECRET}" \ + default_role="${default_role}" + +vault_cmd auth tune -listing-visibility=unauth oidc >/dev/null + +build_bound_claims() { + claim="$1" + groups="$2" + json="{\"${claim}\":[" + first=1 + old_ifs=$IFS + IFS=, + for item in $groups; do + item="$(printf '%s' "$item" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')" + if [ -z "${item}" ]; then + continue + fi + if [ "${first}" -eq 0 ]; then + json="${json}," + fi + json="${json}\"${item}\"" + first=0 + done + IFS=$old_ifs + json="${json}]}" + printf '%s' "${json}" +} + +build_json_array() { + items="$1" + json="[" + first=1 + old_ifs=$IFS + IFS=, + for item in $items; do + item="$(printf '%s' "$item" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')" + if [ -z "${item}" ]; then + continue + fi + if [ "${first}" -eq 0 ]; then + json="${json}," + fi + json="${json}\"${item}\"" + first=0 + done + IFS=$old_ifs + json="${json}]" + printf '%s' "${json}" +} + +configure_role() { + role_name="$1" + role_groups="$2" + role_policies="$3" + if [ -z "${role_name}" ] || [ -z "${role_groups}" ] || [ -z "${role_policies}" ]; then + log "skipping role ${role_name} (missing groups or policies)" + return + fi + claims="$(build_bound_claims "${groups_claim}" "${role_groups}")" + scopes_csv="$(printf '%s' "${scopes}" | tr ' ' ',' | tr -s ',' | sed 's/^,//;s/,$//')" + redirect_json="$(build_json_array "${redirect_uris}")" + payload_file="$(mktemp)" + cat > "${payload_file}" <- + . /vault/secrets/vaultwarden-env.sh + && exec /start.sh env: - name: SIGNUPS_ALLOWED value: "false" @@ -29,30 +52,32 @@ spec: value: "true" - name: DOMAIN value: "https://vault.bstein.dev" + - name: DB_CONNECTION_RETRIES + value: "0" + - name: DATABASE_TIMEOUT + value: "60" + - name: DATABASE_MIN_CONNS + value: "2" + - name: DATABASE_MAX_CONNS + value: "20" + - name: DATABASE_IDLE_TIMEOUT + value: "600" - name: SMTP_HOST - value: "mailu-front.mailu-mailserver.svc.cluster.local" + value: "mail.bstein.dev" - name: SMTP_PORT - value: "25" + value: "587" - name: SMTP_SECURITY value: "starttls" - name: SMTP_ACCEPT_INVALID_HOSTNAMES - value: "true" + value: "false" - name: SMTP_ACCEPT_INVALID_CERTS - value: "true" + value: "false" + - name: SMTP_USERNAME + value: "no-reply-vaultwarden@bstein.dev" - name: SMTP_FROM - value: "postmaster@bstein.dev" + value: "no-reply-vaultwarden@bstein.dev" - name: SMTP_FROM_NAME - value: "Atlas Vaultwarden" - - name: DATABASE_URL - valueFrom: - secretKeyRef: - name: vaultwarden-db-url - key: DATABASE_URL - - name: ADMIN_TOKEN - valueFrom: - secretKeyRef: - name: vaultwarden-admin - key: ADMIN_TOKEN + value: "Vaultwarden" ports: - name: http containerPort: 80 diff --git a/services/vaultwarden/kustomization.yaml b/services/vaultwarden/kustomization.yaml index f0d02fd..c53cb1c 100644 --- a/services/vaultwarden/kustomization.yaml +++ b/services/vaultwarden/kustomization.yaml @@ -4,6 +4,7 @@ kind: Kustomization namespace: vaultwarden resources: - namespace.yaml + - serviceaccount.yaml - pvc.yaml - deployment.yaml - service.yaml diff --git a/services/vaultwarden/serviceaccount.yaml b/services/vaultwarden/serviceaccount.yaml new file mode 100644 index 0000000..445ee15 --- /dev/null +++ b/services/vaultwarden/serviceaccount.yaml @@ -0,0 +1,6 @@ +# services/vaultwarden/serviceaccount.yaml +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vaultwarden-vault + namespace: vaultwarden