Merge pull request 'deploy' (#10) from deploy into main

Reviewed-on: #10
This commit is contained in:
bstein 2026-01-19 19:03:59 +00:00
commit c4ce7e3981
335 changed files with 16079 additions and 6354 deletions

1
.gitignore vendored
View File

@ -6,3 +6,4 @@ __pycache__/
*.py[cod] *.py[cod]
.pytest_cache .pytest_cache
.venv .venv
tmp/

53
ci/Jenkinsfile.titan-iac Normal file
View File

@ -0,0 +1,53 @@
pipeline {
agent {
kubernetes {
defaultContainer 'python'
yaml """
apiVersion: v1
kind: Pod
spec:
containers:
- name: python
image: python:3.12-slim
command:
- cat
tty: true
"""
}
}
environment {
PIP_DISABLE_PIP_VERSION_CHECK = '1'
PYTHONUNBUFFERED = '1'
DEPLOY_BRANCH = 'deploy'
}
stages {
stage('Checkout') {
steps {
checkout scm
}
}
stage('Install deps') {
steps {
sh 'pip install --no-cache-dir -r ci/requirements.txt'
}
}
stage('Glue tests') {
steps {
sh 'pytest -q ci/tests/glue'
}
}
stage('Promote') {
steps {
withCredentials([usernamePassword(credentialsId: 'gitea-pat', usernameVariable: 'GIT_USER', passwordVariable: 'GIT_TOKEN')]) {
sh '''
set +x
git config user.email "jenkins@bstein.dev"
git config user.name "jenkins"
git remote set-url origin https://${GIT_USER}:${GIT_TOKEN}@scm.bstein.dev/bstein/titan-iac.git
git push origin HEAD:${DEPLOY_BRANCH}
'''
}
}
}
}
}

4
ci/requirements.txt Normal file
View File

@ -0,0 +1,4 @@
pytest==8.3.4
kubernetes==30.1.0
PyYAML==6.0.2
requests==2.32.3

View File

@ -0,0 +1,7 @@
max_success_age_hours: 48
allow_suspended:
- comms/othrys-room-reset
- comms/pin-othrys-invite
- comms/seed-othrys-room
- finance/firefly-user-sync
- health/wger-user-sync

View File

@ -0,0 +1,46 @@
from __future__ import annotations
from datetime import datetime, timezone
from pathlib import Path
import yaml
from kubernetes import client, config
CONFIG_PATH = Path(__file__).with_name("config.yaml")
def _load_config() -> dict:
with CONFIG_PATH.open("r", encoding="utf-8") as handle:
return yaml.safe_load(handle) or {}
def _load_kube():
try:
config.load_incluster_config()
except config.ConfigException:
config.load_kube_config()
def test_glue_cronjobs_recent_success():
cfg = _load_config()
max_age_hours = int(cfg.get("max_success_age_hours", 48))
allow_suspended = set(cfg.get("allow_suspended", []))
_load_kube()
batch = client.BatchV1Api()
cronjobs = batch.list_cron_job_for_all_namespaces(label_selector="atlas.bstein.dev/glue=true").items
assert cronjobs, "No glue cronjobs found with atlas.bstein.dev/glue=true"
now = datetime.now(timezone.utc)
for cronjob in cronjobs:
name = f"{cronjob.metadata.namespace}/{cronjob.metadata.name}"
if cronjob.spec.suspend:
assert name in allow_suspended, f"{name} is suspended but not in allow_suspended"
continue
last_success = cronjob.status.last_successful_time
assert last_success is not None, f"{name} has no lastSuccessfulTime"
age_hours = (now - last_success).total_seconds() / 3600
assert age_hours <= max_age_hours, f"{name} last success {age_hours:.1f}h ago"

View File

@ -0,0 +1,29 @@
from __future__ import annotations
import os
import requests
VM_URL = os.environ.get("VM_URL", "http://victoria-metrics-single-server:8428").rstrip("/")
def _query(promql: str) -> list[dict]:
response = requests.get(f"{VM_URL}/api/v1/query", params={"query": promql}, timeout=10)
response.raise_for_status()
payload = response.json()
return payload.get("data", {}).get("result", [])
def test_glue_metrics_present():
series = _query('kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}')
assert series, "No glue cronjob label series found"
def test_glue_metrics_success_join():
query = (
"kube_cronjob_status_last_successful_time "
'and on(namespace,cronjob) kube_cronjob_labels{label_atlas_bstein_dev_glue="true"}'
)
series = _query(query)
assert series, "No glue cronjob last success series found"

View File

@ -1,13 +0,0 @@
# clusters/atlas/applications/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../services/crypto
- ../../services/gitea
- ../../services/jellyfin
- ../../services/comms
- ../../services/monitoring
- ../../services/logging
- ../../services/pegasus
- ../../services/vault
- ../../services/bstein-dev-home

View File

@ -13,14 +13,14 @@ spec:
git: git:
checkout: checkout:
ref: ref:
branch: main branch: feature/vault-consumption
commit: commit:
author: author:
email: ops@bstein.dev email: ops@bstein.dev
name: flux-bot name: flux-bot
messageTemplate: "chore(bstein-dev-home): update images to {{range .Updated.Images}}{{.}}{{end}}" messageTemplate: "chore(bstein-dev-home): update images to {{range .Updated.Images}}{{.}}{{end}}"
push: push:
branch: main branch: feature/vault-consumption
update: update:
strategy: Setters strategy: Setters
path: services/bstein-dev-home path: services/bstein-dev-home

View File

@ -1,4 +1,4 @@
# clusters/atlas/flux-system/applications/communication/kustomization.yaml # clusters/atlas/flux-system/applications/comms/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1 apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization kind: Kustomization
metadata: metadata:

View File

@ -0,0 +1,24 @@
# clusters/atlas/flux-system/applications/finance/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: finance
namespace: flux-system
spec:
interval: 10m
path: ./services/finance
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: finance
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: actual-budget
namespace: finance
- apiVersion: apps/v1
kind: Deployment
name: firefly
namespace: finance
wait: false

View File

@ -0,0 +1,25 @@
# clusters/atlas/flux-system/applications/health/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: health
namespace: flux-system
spec:
interval: 10m
path: ./services/health
prune: true
sourceRef:
kind: GitRepository
name: flux-system
targetNamespace: health
dependsOn:
- name: keycloak
- name: postgres
- name: traefik
- name: vault
healthChecks:
- apiVersion: apps/v1
kind: Deployment
name: wger
namespace: health
wait: false

View File

@ -16,6 +16,7 @@ resources:
- harbor/image-automation.yaml - harbor/image-automation.yaml
- jellyfin/kustomization.yaml - jellyfin/kustomization.yaml
- xmr-miner/kustomization.yaml - xmr-miner/kustomization.yaml
- wallet-monero-temp/kustomization.yaml
- sui-metrics/kustomization.yaml - sui-metrics/kustomization.yaml
- openldap/kustomization.yaml - openldap/kustomization.yaml
- keycloak/kustomization.yaml - keycloak/kustomization.yaml
@ -27,3 +28,5 @@ resources:
- nextcloud-mail-sync/kustomization.yaml - nextcloud-mail-sync/kustomization.yaml
- outline/kustomization.yaml - outline/kustomization.yaml
- planka/kustomization.yaml - planka/kustomization.yaml
- finance/kustomization.yaml
- health/kustomization.yaml

View File

@ -0,0 +1,19 @@
# clusters/atlas/flux-system/applications/wallet-monero-temp/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: wallet-monero-temp
namespace: flux-system
spec:
interval: 10m
path: ./services/crypto/wallet-monero-temp
targetNamespace: crypto
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
dependsOn:
- name: crypto
- name: xmr-miner
wait: true

View File

@ -1,3 +1,4 @@
# clusters/atlas/flux-system/gotk-components.yaml
--- ---
# This manifest was generated by flux. DO NOT EDIT. # This manifest was generated by flux. DO NOT EDIT.
# Flux Version: v2.7.5 # Flux Version: v2.7.5

View File

@ -1,3 +1,4 @@
# clusters/atlas/flux-system/gotk-sync.yaml
# This manifest was generated by flux. DO NOT EDIT. # This manifest was generated by flux. DO NOT EDIT.
--- ---
apiVersion: source.toolkit.fluxcd.io/v1 apiVersion: source.toolkit.fluxcd.io/v1
@ -8,7 +9,7 @@ metadata:
spec: spec:
interval: 1m0s interval: 1m0s
ref: ref:
branch: main branch: deploy
secretRef: secretRef:
name: flux-system-gitea name: flux-system-gitea
url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git

View File

@ -0,0 +1,17 @@
# clusters/atlas/flux-system/platform/cert-manager-cleanup/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cert-manager-cleanup
namespace: flux-system
spec:
interval: 30m
path: ./infrastructure/cert-manager/cleanup
prune: true
force: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
targetNamespace: cert-manager
wait: true

View File

@ -0,0 +1,19 @@
# clusters/atlas/flux-system/platform/cert-manager/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: cert-manager
namespace: flux-system
spec:
interval: 30m
path: ./infrastructure/cert-manager
prune: true
force: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
targetNamespace: cert-manager
dependsOn:
- name: helm
wait: true

View File

@ -4,12 +4,16 @@ kind: Kustomization
resources: resources:
- core/kustomization.yaml - core/kustomization.yaml
- helm/kustomization.yaml - helm/kustomization.yaml
- cert-manager/kustomization.yaml
- metallb/kustomization.yaml - metallb/kustomization.yaml
- traefik/kustomization.yaml - traefik/kustomization.yaml
- gitops-ui/kustomization.yaml - gitops-ui/kustomization.yaml
- monitoring/kustomization.yaml - monitoring/kustomization.yaml
- logging/kustomization.yaml - logging/kustomization.yaml
- maintenance/kustomization.yaml - maintenance/kustomization.yaml
- longhorn-adopt/kustomization.yaml
- longhorn/kustomization.yaml
- longhorn-ui/kustomization.yaml - longhorn-ui/kustomization.yaml
- postgres/kustomization.yaml - postgres/kustomization.yaml
- ../platform/vault-csi/kustomization.yaml - ../platform/vault-csi/kustomization.yaml
- ../platform/vault-injector/kustomization.yaml

View File

@ -0,0 +1,17 @@
# clusters/atlas/flux-system/platform/longhorn-adopt/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: longhorn-adopt
namespace: flux-system
spec:
interval: 30m
path: ./infrastructure/longhorn/adopt
prune: true
force: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
targetNamespace: longhorn-system
wait: true

View File

@ -15,4 +15,5 @@ spec:
namespace: flux-system namespace: flux-system
dependsOn: dependsOn:
- name: core - name: core
- name: longhorn
wait: true wait: true

View File

@ -0,0 +1,20 @@
# clusters/atlas/flux-system/platform/longhorn/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: longhorn
namespace: flux-system
spec:
interval: 30m
path: ./infrastructure/longhorn/core
prune: true
force: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
targetNamespace: longhorn-system
dependsOn:
- name: helm
- name: longhorn-adopt
wait: false

View File

@ -0,0 +1,16 @@
# clusters/atlas/flux-system/platform/vault-injector/kustomization.yaml
apiVersion: kustomize.toolkit.fluxcd.io/v1
kind: Kustomization
metadata:
name: vault-injector
namespace: flux-system
spec:
interval: 30m
path: ./infrastructure/vault-injector
targetNamespace: vault
prune: true
sourceRef:
kind: GitRepository
name: flux-system
namespace: flux-system
wait: true

View File

@ -1,8 +0,0 @@
# clusters/atlas/platform/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../../infrastructure/modules/base
- ../../../infrastructure/modules/profiles/atlas-ha
- ../../../infrastructure/sources/cert-manager/letsencrypt.yaml
- ../../../infrastructure/metallb

View File

@ -0,0 +1,5 @@
FROM python:3.11-slim
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
RUN pip install --no-cache-dir requests psycopg2-binary

View File

@ -0,0 +1,9 @@
FROM registry.bstein.dev/infra/harbor-core:v2.14.1-arm64
USER root
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER harbor
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/harbor/entrypoint.sh"]

View File

@ -0,0 +1,9 @@
FROM registry.bstein.dev/infra/harbor-jobservice:v2.14.1-arm64
USER root
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER harbor
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/harbor/entrypoint.sh"]

View File

@ -0,0 +1,9 @@
FROM registry.bstein.dev/infra/harbor-registry:v2.14.1-arm64
USER root
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER harbor
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/home/harbor/entrypoint.sh"]

View File

@ -0,0 +1,9 @@
FROM registry.bstein.dev/infra/harbor-registryctl:v2.14.1-arm64
USER root
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
USER harbor
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/home/harbor/start.sh"]

View File

@ -0,0 +1,10 @@
FROM ghcr.io/element-hq/lk-jwt-service:0.3.0 AS base
FROM alpine:3.20
RUN apk add --no-cache ca-certificates
COPY --from=base /lk-jwt-service /lk-jwt-service
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/lk-jwt-service"]

View File

@ -0,0 +1,10 @@
FROM quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 AS base
FROM alpine:3.20
RUN apk add --no-cache ca-certificates
COPY --from=base /bin/oauth2-proxy /bin/oauth2-proxy
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/bin/oauth2-proxy"]

View File

@ -0,0 +1,10 @@
FROM registry.bstein.dev/streaming/pegasus:1.2.32 AS base
FROM alpine:3.20
RUN apk add --no-cache ca-certificates
COPY --from=base /pegasus /pegasus
COPY dockerfiles/vault-entrypoint.sh /entrypoint.sh
RUN chmod 0755 /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
CMD ["/pegasus"]

View File

@ -0,0 +1,34 @@
#!/bin/sh
set -eu
if [ -n "${VAULT_ENV_FILE:-}" ]; then
if [ -f "${VAULT_ENV_FILE}" ]; then
# shellcheck disable=SC1090
. "${VAULT_ENV_FILE}"
else
echo "Vault env file not found: ${VAULT_ENV_FILE}" >&2
exit 1
fi
fi
if [ -n "${VAULT_COPY_FILES:-}" ]; then
old_ifs="$IFS"
IFS=','
for pair in ${VAULT_COPY_FILES}; do
src="${pair%%:*}"
dest="${pair#*:}"
if [ -z "${src}" ] || [ -z "${dest}" ]; then
echo "Vault copy entry malformed: ${pair}" >&2
exit 1
fi
if [ ! -f "${src}" ]; then
echo "Vault file not found: ${src}" >&2
exit 1
fi
mkdir -p "$(dirname "${dest}")"
cp "${src}" "${dest}"
done
IFS="$old_ifs"
fi
exec "$@"

View File

@ -0,0 +1,40 @@
# infrastructure/cert-manager/cleanup/cert-manager-cleanup-job.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: cert-manager-cleanup-2
namespace: cert-manager
spec:
backoffLimit: 1
template:
spec:
serviceAccountName: cert-manager-cleanup
restartPolicy: Never
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/worker
operator: Exists
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/arch
operator: In
values: ["arm64"]
containers:
- name: cleanup
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
command: ["/usr/bin/env", "bash"]
args: ["/scripts/cert_manager_cleanup.sh"]
volumeMounts:
- name: script
mountPath: /scripts
readOnly: true
volumes:
- name: script
configMap:
name: cert-manager-cleanup-script
defaultMode: 0555

View File

@ -0,0 +1,58 @@
# infrastructure/cert-manager/cleanup/cert-manager-cleanup-rbac.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: cert-manager-cleanup
namespace: cert-manager
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cert-manager-cleanup
rules:
- apiGroups: [""]
resources:
- pods
- services
- endpoints
- configmaps
- secrets
- serviceaccounts
verbs: ["get", "list", "watch", "delete"]
- apiGroups: ["apps"]
resources:
- deployments
- daemonsets
- statefulsets
- replicasets
verbs: ["get", "list", "watch", "delete"]
- apiGroups: ["batch"]
resources:
- jobs
- cronjobs
verbs: ["get", "list", "watch", "delete"]
- apiGroups: ["rbac.authorization.k8s.io"]
resources:
- roles
- rolebindings
- clusterroles
- clusterrolebindings
verbs: ["get", "list", "watch", "delete"]
- apiGroups: ["admissionregistration.k8s.io"]
resources:
- validatingwebhookconfigurations
- mutatingwebhookconfigurations
verbs: ["get", "list", "watch", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cert-manager-cleanup
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cert-manager-cleanup
subjects:
- kind: ServiceAccount
name: cert-manager-cleanup
namespace: cert-manager

View File

@ -0,0 +1,15 @@
# infrastructure/cert-manager/cleanup/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- cert-manager-cleanup-rbac.yaml
- cert-manager-cleanup-job.yaml
configMapGenerator:
- name: cert-manager-cleanup-script
namespace: cert-manager
files:
- cert_manager_cleanup.sh=scripts/cert_manager_cleanup.sh
options:
disableNameSuffixHash: true

View File

@ -0,0 +1,5 @@
# infrastructure/cert-manager/cleanup/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: cert-manager

View File

@ -0,0 +1,37 @@
#!/usr/bin/env bash
set -euo pipefail
namespace="cert-manager"
selectors=(
"app.kubernetes.io/name=cert-manager"
"app.kubernetes.io/instance=cert-manager"
"app.kubernetes.io/instance=certmanager-prod"
)
delete_namespaced() {
local selector="$1"
kubectl -n "${namespace}" delete deployment,daemonset,statefulset,replicaset \
--selector "${selector}" --ignore-not-found --wait=false
kubectl -n "${namespace}" delete pod,service,endpoints,serviceaccount,configmap,secret \
--selector "${selector}" --ignore-not-found --wait=false
kubectl -n "${namespace}" delete role,rolebinding \
--selector "${selector}" --ignore-not-found --wait=false
kubectl -n "${namespace}" delete job,cronjob \
--selector "${selector}" --ignore-not-found --wait=false
}
delete_cluster_scoped() {
local selector="$1"
kubectl delete clusterrole,clusterrolebinding \
--selector "${selector}" --ignore-not-found --wait=false
kubectl delete mutatingwebhookconfiguration,validatingwebhookconfiguration \
--selector "${selector}" --ignore-not-found --wait=false
}
for selector in "${selectors[@]}"; do
delete_namespaced "${selector}"
delete_cluster_scoped "${selector}"
done
kubectl delete mutatingwebhookconfiguration cert-manager-webhook --ignore-not-found --wait=false
kubectl delete validatingwebhookconfiguration cert-manager-webhook --ignore-not-found --wait=false

View File

@ -0,0 +1,67 @@
# infrastructure/cert-manager/helmrelease.yaml
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: cert-manager
namespace: cert-manager
spec:
interval: 30m
chart:
spec:
chart: cert-manager
version: v1.17.0
sourceRef:
kind: HelmRepository
name: jetstack
namespace: flux-system
install:
crds: CreateReplace
remediation: { retries: 3 }
timeout: 10m
upgrade:
crds: CreateReplace
remediation:
retries: 3
remediateLastFailure: true
cleanupOnFail: true
timeout: 10m
values:
installCRDs: true
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- rpi4
webhook:
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- rpi4
cainjector:
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- rpi4

View File

@ -0,0 +1,6 @@
# infrastructure/cert-manager/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- helmrelease.yaml

View File

@ -0,0 +1,5 @@
# infrastructure/cert-manager/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: cert-manager

View File

@ -0,0 +1,44 @@
# infrastructure/core/coredns-custom.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: coredns-custom
namespace: kube-system
data:
bstein-dev.server: |
bstein.dev:53 {
errors
cache 30
hosts {
192.168.22.9 alerts.bstein.dev
192.168.22.9 auth.bstein.dev
192.168.22.9 bstein.dev
10.43.6.87 budget.bstein.dev
192.168.22.9 call.live.bstein.dev
192.168.22.9 cd.bstein.dev
192.168.22.9 chat.ai.bstein.dev
192.168.22.9 ci.bstein.dev
192.168.22.9 cloud.bstein.dev
192.168.22.9 health.bstein.dev
192.168.22.9 kit.live.bstein.dev
192.168.22.9 live.bstein.dev
192.168.22.9 logs.bstein.dev
192.168.22.9 longhorn.bstein.dev
192.168.22.4 mail.bstein.dev
192.168.22.9 matrix.live.bstein.dev
192.168.22.9 metrics.bstein.dev
192.168.22.9 monero.bstein.dev
10.43.6.87 money.bstein.dev
192.168.22.9 notes.bstein.dev
192.168.22.9 office.bstein.dev
192.168.22.9 pegasus.bstein.dev
192.168.22.9 registry.bstein.dev
192.168.22.9 scm.bstein.dev
192.168.22.9 secret.bstein.dev
192.168.22.9 sso.bstein.dev
192.168.22.9 stream.bstein.dev
192.168.22.9 tasks.bstein.dev
192.168.22.9 vault.bstein.dev
fallthrough
}
}

View File

@ -0,0 +1,141 @@
# infrastructure/core/coredns-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: coredns
namespace: kube-system
labels:
k8s-app: kube-dns
kubernetes.io/name: CoreDNS
spec:
progressDeadlineSeconds: 600
replicas: 2
revisionHistoryLimit: 0
selector:
matchLabels:
k8s-app: kube-dns
strategy:
type: RollingUpdate
rollingUpdate:
maxSurge: 25%
maxUnavailable: 1
template:
metadata:
labels:
k8s-app: kube-dns
spec:
containers:
- name: coredns
image: registry.bstein.dev/infra/coredns:1.12.1
imagePullPolicy: IfNotPresent
args:
- -conf
- /etc/coredns/Corefile
ports:
- containerPort: 53
name: dns
protocol: UDP
- containerPort: 53
name: dns-tcp
protocol: TCP
- containerPort: 9153
name: metrics
protocol: TCP
livenessProbe:
httpGet:
path: /health
port: 8080
scheme: HTTP
initialDelaySeconds: 60
periodSeconds: 10
timeoutSeconds: 1
successThreshold: 1
failureThreshold: 3
readinessProbe:
httpGet:
path: /ready
port: 8181
scheme: HTTP
periodSeconds: 2
timeoutSeconds: 1
successThreshold: 1
failureThreshold: 3
resources:
limits:
memory: 170Mi
requests:
cpu: 100m
memory: 70Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
add:
- NET_BIND_SERVICE
drop:
- all
readOnlyRootFilesystem: true
volumeMounts:
- name: config-volume
mountPath: /etc/coredns
readOnly: true
- name: custom-config-volume
mountPath: /etc/coredns/custom
readOnly: true
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: hardware
operator: In
values:
- rpi5
- rpi4
- key: node-role.kubernetes.io/worker
operator: In
values:
- "true"
dnsPolicy: Default
nodeSelector:
kubernetes.io/os: linux
priorityClassName: system-cluster-critical
restartPolicy: Always
schedulerName: default-scheduler
serviceAccountName: coredns
tolerations:
- key: CriticalAddonsOnly
operator: Exists
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
topologySpreadConstraints:
- maxSkew: 1
topologyKey: kubernetes.io/hostname
whenUnsatisfiable: DoNotSchedule
labelSelector:
matchLabels:
k8s-app: kube-dns
- maxSkew: 1
topologyKey: topology.kubernetes.io/zone
whenUnsatisfiable: ScheduleAnyway
labelSelector:
matchLabels:
k8s-app: kube-dns
volumes:
- name: config-volume
configMap:
name: coredns
defaultMode: 420
items:
- key: Corefile
path: Corefile
- key: NodeHosts
path: NodeHosts
- name: custom-config-volume
configMap:
name: coredns-custom
optional: true
defaultMode: 420

View File

@ -4,5 +4,7 @@ kind: Kustomization
resources: resources:
- ../modules/base - ../modules/base
- ../modules/profiles/atlas-ha - ../modules/profiles/atlas-ha
- coredns-custom.yaml
- coredns-deployment.yaml
- ../sources/cert-manager/letsencrypt.yaml - ../sources/cert-manager/letsencrypt.yaml
- ../sources/cert-manager/letsencrypt-prod.yaml - ../sources/cert-manager/letsencrypt-prod.yaml

View File

@ -0,0 +1,15 @@
# infrastructure/longhorn/adopt/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- longhorn-adopt-rbac.yaml
- longhorn-helm-adopt-job.yaml
configMapGenerator:
- name: longhorn-helm-adopt-script
namespace: longhorn-system
files:
- longhorn_helm_adopt.sh=scripts/longhorn_helm_adopt.sh
options:
disableNameSuffixHash: true

View File

@ -0,0 +1,56 @@
# infrastructure/longhorn/adopt/longhorn-adopt-rbac.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: longhorn-helm-adopt
namespace: longhorn-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: longhorn-helm-adopt
rules:
- apiGroups: [""]
resources:
- configmaps
- services
- serviceaccounts
- secrets
verbs: ["get", "list", "watch", "patch", "update"]
- apiGroups: ["apps"]
resources:
- deployments
- daemonsets
verbs: ["get", "list", "watch", "patch", "update"]
- apiGroups: ["batch"]
resources:
- jobs
verbs: ["get", "list", "watch", "patch", "update"]
- apiGroups: ["rbac.authorization.k8s.io"]
resources:
- roles
- rolebindings
- clusterroles
- clusterrolebindings
verbs: ["get", "list", "watch", "patch", "update"]
- apiGroups: ["apiextensions.k8s.io"]
resources:
- customresourcedefinitions
verbs: ["get", "list", "watch", "patch", "update"]
- apiGroups: ["scheduling.k8s.io"]
resources:
- priorityclasses
verbs: ["get", "list", "watch", "patch", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: longhorn-helm-adopt
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: longhorn-helm-adopt
subjects:
- kind: ServiceAccount
name: longhorn-helm-adopt
namespace: longhorn-system

View File

@ -0,0 +1,40 @@
# infrastructure/longhorn/adopt/longhorn-helm-adopt-job.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: longhorn-helm-adopt-2
namespace: longhorn-system
spec:
backoffLimit: 1
template:
spec:
serviceAccountName: longhorn-helm-adopt
restartPolicy: Never
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/worker
operator: Exists
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/arch
operator: In
values: ["arm64"]
containers:
- name: adopt
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
command: ["/usr/bin/env", "bash"]
args: ["/scripts/longhorn_helm_adopt.sh"]
volumeMounts:
- name: script
mountPath: /scripts
readOnly: true
volumes:
- name: script
configMap:
name: longhorn-helm-adopt-script
defaultMode: 0555

View File

@ -0,0 +1,5 @@
# infrastructure/longhorn/adopt/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: longhorn-system

View File

@ -0,0 +1,52 @@
#!/usr/bin/env bash
set -euo pipefail
release_name="longhorn"
release_namespace="longhorn-system"
selector="app.kubernetes.io/instance=${release_name}"
annotate_and_label() {
local scope="$1"
local kind="$2"
if [ "${scope}" = "namespaced" ]; then
kubectl -n "${release_namespace}" annotate "${kind}" -l "${selector}" \
meta.helm.sh/release-name="${release_name}" \
meta.helm.sh/release-namespace="${release_namespace}" \
--overwrite >/dev/null 2>&1 || true
kubectl -n "${release_namespace}" label "${kind}" -l "${selector}" \
app.kubernetes.io/managed-by=Helm --overwrite >/dev/null 2>&1 || true
else
kubectl annotate "${kind}" -l "${selector}" \
meta.helm.sh/release-name="${release_name}" \
meta.helm.sh/release-namespace="${release_namespace}" \
--overwrite >/dev/null 2>&1 || true
kubectl label "${kind}" -l "${selector}" \
app.kubernetes.io/managed-by=Helm --overwrite >/dev/null 2>&1 || true
fi
}
namespaced_kinds=(
configmap
service
serviceaccount
deployment
daemonset
job
role
rolebinding
)
cluster_kinds=(
clusterrole
clusterrolebinding
customresourcedefinition
priorityclass
)
for kind in "${namespaced_kinds[@]}"; do
annotate_and_label "namespaced" "${kind}"
done
for kind in "${cluster_kinds[@]}"; do
annotate_and_label "cluster" "${kind}"
done

View File

@ -0,0 +1,80 @@
# infrastructure/longhorn/core/helmrelease.yaml
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: longhorn
namespace: longhorn-system
spec:
interval: 30m
chart:
spec:
chart: longhorn
version: 1.8.2
sourceRef:
kind: HelmRepository
name: longhorn
namespace: flux-system
install:
crds: Skip
remediation: { retries: 3 }
timeout: 15m
upgrade:
crds: Skip
remediation:
retries: 3
remediateLastFailure: true
cleanupOnFail: true
timeout: 15m
values:
service:
ui:
type: NodePort
nodePort: 30824
privateRegistry:
createSecret: false
registrySecret: longhorn-registry
image:
pullPolicy: Always
longhorn:
engine:
repository: registry.bstein.dev/infra/longhorn-engine
tag: v1.8.2
manager:
repository: registry.bstein.dev/infra/longhorn-manager
tag: v1.8.2
ui:
repository: registry.bstein.dev/infra/longhorn-ui
tag: v1.8.2
instanceManager:
repository: registry.bstein.dev/infra/longhorn-instance-manager
tag: v1.8.2
shareManager:
repository: registry.bstein.dev/infra/longhorn-share-manager
tag: v1.8.2
backingImageManager:
repository: registry.bstein.dev/infra/longhorn-backing-image-manager
tag: v1.8.2
supportBundleKit:
repository: registry.bstein.dev/infra/longhorn-support-bundle-kit
tag: v0.0.56
csi:
attacher:
repository: registry.bstein.dev/infra/longhorn-csi-attacher
tag: v4.9.0
provisioner:
repository: registry.bstein.dev/infra/longhorn-csi-provisioner
tag: v5.3.0
nodeDriverRegistrar:
repository: registry.bstein.dev/infra/longhorn-csi-node-driver-registrar
tag: v2.14.0
resizer:
repository: registry.bstein.dev/infra/longhorn-csi-resizer
tag: v1.13.2
snapshotter:
repository: registry.bstein.dev/infra/longhorn-csi-snapshotter
tag: v8.2.0
livenessProbe:
repository: registry.bstein.dev/infra/longhorn-livenessprobe
tag: v2.16.0
defaultSettings:
systemManagedPodsImagePullPolicy: Always

View File

@ -0,0 +1,18 @@
# infrastructure/longhorn/core/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- namespace.yaml
- vault-serviceaccount.yaml
- secretproviderclass.yaml
- vault-sync-deployment.yaml
- helmrelease.yaml
- longhorn-settings-ensure-job.yaml
configMapGenerator:
- name: longhorn-settings-ensure-script
files:
- longhorn_settings_ensure.sh=scripts/longhorn_settings_ensure.sh
generatorOptions:
disableNameSuffixHash: true

View File

@ -0,0 +1,36 @@
# infrastructure/longhorn/core/longhorn-settings-ensure-job.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: longhorn-settings-ensure-4
namespace: longhorn-system
spec:
backoffLimit: 0
ttlSecondsAfterFinished: 3600
template:
spec:
serviceAccountName: longhorn-service-account
restartPolicy: Never
volumes:
- name: longhorn-settings-ensure-script
configMap:
name: longhorn-settings-ensure-script
defaultMode: 0555
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/arch
operator: In
values: ["arm64"]
- key: node-role.kubernetes.io/worker
operator: Exists
containers:
- name: apply
image: bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
command: ["/scripts/longhorn_settings_ensure.sh"]
volumeMounts:
- name: longhorn-settings-ensure-script
mountPath: /scripts
readOnly: true

View File

@ -0,0 +1,5 @@
# infrastructure/longhorn/core/namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: longhorn-system

View File

@ -0,0 +1,42 @@
#!/usr/bin/env sh
set -eu
# Longhorn blocks direct CR patches for some settings; use the internal API instead.
api_base="http://longhorn-backend.longhorn-system.svc:9500/v1/settings"
wait_for_api() {
attempts=30
while [ "${attempts}" -gt 0 ]; do
if curl -fsS "${api_base}" >/dev/null 2>&1; then
return 0
fi
attempts=$((attempts - 1))
sleep 2
done
echo "Longhorn API not ready after retries." >&2
return 1
}
update_setting() {
name="$1"
value="$2"
current="$(curl -fsS "${api_base}/${name}" || true)"
if echo "${current}" | grep -Fq "\"value\":\"${value}\""; then
echo "Setting ${name} already set."
return 0
fi
echo "Setting ${name} -> ${value}"
curl -fsS -X PUT \
-H "Content-Type: application/json" \
-d "{\"value\":\"${value}\"}" \
"${api_base}/${name}" >/dev/null
}
wait_for_api
update_setting default-engine-image "registry.bstein.dev/infra/longhorn-engine:v1.8.2"
update_setting default-instance-manager-image "registry.bstein.dev/infra/longhorn-instance-manager:v1.8.2"
update_setting default-backing-image-manager-image "registry.bstein.dev/infra/longhorn-backing-image-manager:v1.8.2"
update_setting support-bundle-manager-image "registry.bstein.dev/infra/longhorn-support-bundle-kit:v0.0.56"

View File

@ -0,0 +1,21 @@
# infrastructure/longhorn/core/secretproviderclass.yaml
apiVersion: secrets-store.csi.x-k8s.io/v1
kind: SecretProviderClass
metadata:
name: longhorn-vault
namespace: longhorn-system
spec:
provider: vault
parameters:
vaultAddress: "http://vault.vault.svc.cluster.local:8200"
roleName: "longhorn"
objects: |
- objectName: "harbor-pull__dockerconfigjson"
secretPath: "kv/data/atlas/harbor-pull/longhorn"
secretKey: "dockerconfigjson"
secretObjects:
- secretName: longhorn-registry
type: kubernetes.io/dockerconfigjson
data:
- objectName: harbor-pull__dockerconfigjson
key: .dockerconfigjson

View File

@ -0,0 +1,6 @@
# infrastructure/longhorn/core/vault-serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: longhorn-vault-sync
namespace: longhorn-system

View File

@ -0,0 +1,45 @@
# infrastructure/longhorn/core/vault-sync-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: longhorn-vault-sync
namespace: longhorn-system
spec:
replicas: 1
selector:
matchLabels:
app: longhorn-vault-sync
template:
metadata:
labels:
app: longhorn-vault-sync
spec:
serviceAccountName: longhorn-vault-sync
nodeSelector:
node-role.kubernetes.io/worker: "true"
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 80
preference:
matchExpressions:
- key: hardware
operator: In
values: ["rpi5", "rpi4"]
containers:
- name: sync
image: alpine:3.20
command: ["/bin/sh", "-c"]
args:
- "sleep infinity"
volumeMounts:
- name: vault-secrets
mountPath: /vault/secrets
readOnly: true
volumes:
- name: vault-secrets
csi:
driver: secrets-store.csi.k8s.io
readOnly: true
volumeAttributes:
secretProviderClass: longhorn-vault

View File

@ -2,6 +2,7 @@
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:
- serviceaccount.yaml
- oauth2-proxy-longhorn.yaml
- middleware.yaml - middleware.yaml
- ingress.yaml - ingress.yaml
- oauth2-proxy-longhorn.yaml

View File

@ -32,7 +32,18 @@ spec:
metadata: metadata:
labels: labels:
app: oauth2-proxy-longhorn app: oauth2-proxy-longhorn
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "longhorn"
vault.hashicorp.com/agent-inject-secret-oidc-config: "kv/data/atlas/longhorn/oauth2-proxy"
vault.hashicorp.com/agent-inject-template-oidc-config: |
{{- with secret "kv/data/atlas/longhorn/oauth2-proxy" -}}
client_id = "{{ .Data.data.client_id }}"
client_secret = "{{ .Data.data.client_secret }}"
cookie_secret = "{{ .Data.data.cookie_secret }}"
{{- end -}}
spec: spec:
serviceAccountName: longhorn-vault
nodeSelector: nodeSelector:
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
affinity: affinity:
@ -50,6 +61,7 @@ spec:
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
args: args:
- --provider=oidc - --provider=oidc
- --config=/vault/secrets/oidc-config
- --redirect-url=https://longhorn.bstein.dev/oauth2/callback - --redirect-url=https://longhorn.bstein.dev/oauth2/callback
- --oidc-issuer-url=https://sso.bstein.dev/realms/atlas - --oidc-issuer-url=https://sso.bstein.dev/realms/atlas
- --scope=openid profile email groups - --scope=openid profile email groups
@ -69,22 +81,6 @@ spec:
- --skip-jwt-bearer-tokens=true - --skip-jwt-bearer-tokens=true
- --oidc-groups-claim=groups - --oidc-groups-claim=groups
- --cookie-domain=longhorn.bstein.dev - --cookie-domain=longhorn.bstein.dev
env:
- name: OAUTH2_PROXY_CLIENT_ID
valueFrom:
secretKeyRef:
name: oauth2-proxy-longhorn-oidc
key: client_id
- name: OAUTH2_PROXY_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: oauth2-proxy-longhorn-oidc
key: client_secret
- name: OAUTH2_PROXY_COOKIE_SECRET
valueFrom:
secretKeyRef:
name: oauth2-proxy-longhorn-oidc
key: cookie_secret
ports: ports:
- containerPort: 4180 - containerPort: 4180
name: http name: http

View File

@ -0,0 +1,6 @@
# infrastructure/longhorn/ui-ingress/serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: longhorn-vault
namespace: longhorn-system

View File

@ -0,0 +1,47 @@
# infrastructure/metallb/helmrelease.yaml
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: metallb
namespace: metallb-system
spec:
interval: 30m
chart:
spec:
chart: metallb
version: 0.15.3
sourceRef:
kind: HelmRepository
name: metallb
namespace: flux-system
install:
crds: CreateReplace
remediation: { retries: 3 }
timeout: 10m
upgrade:
crds: CreateReplace
remediation:
retries: 3
remediateLastFailure: true
cleanupOnFail: true
timeout: 10m
values:
loadBalancerClass: metallb
prometheus:
metricsPort: 7472
controller:
logLevel: info
webhookMode: enabled
tlsMinVersion: VersionTLS12
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: hardware
operator: In
values:
- rpi4
- rpi5
speaker:
logLevel: info

View File

@ -3,8 +3,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:
- namespace.yaml - namespace.yaml
- metallb-rendered.yaml - helmrelease.yaml
- ippool.yaml - ippool.yaml
patchesStrategicMerge:
- patches/node-placement.yaml
- patches/speaker-loglevel.yaml

File diff suppressed because it is too large Load Diff

View File

@ -1,27 +0,0 @@
# infrastructure/metallb/patches/node-placement.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: metallb-controller
namespace: metallb-system
spec:
template:
spec:
containers:
- name: controller
args:
- --port=7472
- --log-level=info
- --webhook-mode=enabled
- --tls-min-version=VersionTLS12
- --lb-class=metallb
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: hardware
operator: In
values:
- rpi4
- rpi5

View File

@ -1,15 +0,0 @@
# infrastructure/metallb/patches/speaker-loglevel.yaml
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: metallb-speaker
namespace: metallb-system
spec:
template:
spec:
containers:
- name: speaker
args:
- --port=7472
- --log-level=info
- --lb-class=metallb

View File

@ -0,0 +1,24 @@
# infrastructure/modules/base/storageclass/asteria-encrypted.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: asteria-encrypted
parameters:
diskSelector: asteria
fromBackup: ""
numberOfReplicas: "2"
staleReplicaTimeout: "30"
fsType: "ext4"
replicaAutoBalance: "least-effort"
dataLocality: "disabled"
encrypted: "true"
csi.storage.k8s.io/provisioner-secret-name: ${pvc.name}
csi.storage.k8s.io/provisioner-secret-namespace: ${pvc.namespace}
csi.storage.k8s.io/node-publish-secret-name: ${pvc.name}
csi.storage.k8s.io/node-publish-secret-namespace: ${pvc.namespace}
csi.storage.k8s.io/node-stage-secret-name: ${pvc.name}
csi.storage.k8s.io/node-stage-secret-namespace: ${pvc.namespace}
provisioner: driver.longhorn.io
reclaimPolicy: Retain
allowVolumeExpansion: true
volumeBindingMode: Immediate

View File

@ -3,4 +3,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:
- asteria.yaml - asteria.yaml
- asteria-encrypted.yaml
- astreae.yaml - astreae.yaml

View File

@ -11,5 +11,5 @@ spec:
roleName: "postgres" roleName: "postgres"
objects: | objects: |
- objectName: "postgres_password" - objectName: "postgres_password"
secretPath: "kv/data/postgres" secretPath: "kv/data/atlas/postgres/postgres-db"
secretKey: "POSTGRES_PASSWORD" secretKey: "POSTGRES_PASSWORD"

View File

@ -1,3 +1,4 @@
# infrastructure/sources/cert-manager/letsencrypt-prod.yaml
apiVersion: cert-manager.io/v1 apiVersion: cert-manager.io/v1
kind: ClusterIssuer kind: ClusterIssuer
metadata: metadata:

View File

@ -1,3 +1,4 @@
# infrastructure/sources/cert-manager/letsencrypt.yaml
apiVersion: cert-manager.io/v1 apiVersion: cert-manager.io/v1
kind: ClusterIssuer kind: ClusterIssuer
metadata: metadata:

View File

@ -0,0 +1,9 @@
# infrastructure/sources/helm/ananace.yaml
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: ananace
namespace: flux-system
spec:
interval: 1h
url: https://ananace.gitlab.io/charts

View File

@ -2,15 +2,18 @@
apiVersion: kustomize.config.k8s.io/v1beta1 apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization kind: Kustomization
resources: resources:
- ananace.yaml
- fluent-bit.yaml - fluent-bit.yaml
- grafana.yaml - grafana.yaml
- hashicorp.yaml - hashicorp.yaml
- jetstack.yaml - jetstack.yaml
- jenkins.yaml - jenkins.yaml
- mailu.yaml - mailu.yaml
- metallb.yaml
- opentelemetry.yaml - opentelemetry.yaml
- opensearch.yaml - opensearch.yaml
- harbor.yaml - harbor.yaml
- longhorn.yaml
- prometheus.yaml - prometheus.yaml
- victoria-metrics.yaml - victoria-metrics.yaml
- secrets-store-csi.yaml - secrets-store-csi.yaml

View File

@ -0,0 +1,9 @@
# infrastructure/sources/helm/longhorn.yaml
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: longhorn
namespace: flux-system
spec:
interval: 30m
url: https://charts.longhorn.io

View File

@ -0,0 +1,9 @@
# infrastructure/sources/helm/metallb.yaml
apiVersion: source.toolkit.fluxcd.io/v1
kind: HelmRepository
metadata:
name: metallb
namespace: flux-system
spec:
interval: 1h
url: https://metallb.github.io/metallb

File diff suppressed because it is too large Load Diff

View File

@ -27,6 +27,8 @@ items:
creationTimestamp: null creationTimestamp: null
labels: labels:
app: traefik app: traefik
app.kubernetes.io/instance: traefik-kube-system
app.kubernetes.io/name: traefik
spec: spec:
containers: containers:
- args: - args:

View File

@ -5,6 +5,7 @@ metadata:
name: traefik name: traefik
namespace: flux-system namespace: flux-system
resources: resources:
- crds.yaml
- deployment.yaml - deployment.yaml
- serviceaccount.yaml - serviceaccount.yaml
- clusterrole.yaml - clusterrole.yaml

View File

@ -3,9 +3,10 @@ apiVersion: v1
kind: Service kind: Service
metadata: metadata:
name: traefik name: traefik
namespace: kube-system namespace: traefik
annotations: annotations:
metallb.universe.tf/address-pool: communication-pool metallb.universe.tf/address-pool: communication-pool
metallb.universe.tf/allow-shared-ip: traefik
spec: spec:
type: LoadBalancer type: LoadBalancer
loadBalancerClass: metallb loadBalancerClass: metallb
@ -20,5 +21,4 @@ spec:
targetPort: websecure targetPort: websecure
protocol: TCP protocol: TCP
selector: selector:
app.kubernetes.io/instance: traefik-kube-system app: traefik
app.kubernetes.io/name: traefik

View File

@ -0,0 +1,43 @@
# infrastructure/vault-injector/helmrelease.yaml
apiVersion: helm.toolkit.fluxcd.io/v2
kind: HelmRelease
metadata:
name: vault-injector
namespace: vault
spec:
interval: 30m
chart:
spec:
chart: vault
version: 0.31.0
sourceRef:
kind: HelmRepository
name: hashicorp
namespace: flux-system
install:
remediation: { retries: 3 }
timeout: 10m
upgrade:
remediation:
retries: 3
remediateLastFailure: true
cleanupOnFail: true
timeout: 10m
values:
global:
externalVaultAddr: http://vault.vault.svc.cluster.local:8200
tlsDisable: true
server:
enabled: false
csi:
enabled: false
injector:
enabled: true
replicas: 1
agentImage:
repository: hashicorp/vault
tag: "1.17.6"
webhook:
failurePolicy: Ignore
nodeSelector:
node-role.kubernetes.io/worker: "true"

View File

@ -0,0 +1,5 @@
# infrastructure/vault-injector/kustomization.yaml
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- helmrelease.yaml

View File

@ -1,8 +1,8 @@
{ {
"counts": { "counts": {
"helmrelease_host_hints": 7, "helmrelease_host_hints": 17,
"http_endpoints": 35, "http_endpoints": 37,
"services": 44, "services": 43,
"workloads": 49 "workloads": 54
} }
} }

View File

@ -12,12 +12,7 @@
"targetNamespace": "bstein-dev-home" "targetNamespace": "bstein-dev-home"
}, },
{ {
"name": "ci-demo", "name": "comms",
"path": "services/ci-demo",
"targetNamespace": null
},
{
"name": "communication",
"path": "services/comms", "path": "services/comms",
"targetNamespace": "comms" "targetNamespace": "comms"
}, },
@ -71,6 +66,11 @@
"path": "services/keycloak", "path": "services/keycloak",
"targetNamespace": "sso" "targetNamespace": "sso"
}, },
{
"name": "logging",
"path": "services/logging",
"targetNamespace": null
},
{ {
"name": "longhorn-ui", "name": "longhorn-ui",
"path": "infrastructure/longhorn/ui-ingress", "path": "infrastructure/longhorn/ui-ingress",
@ -81,6 +81,11 @@
"path": "services/mailu", "path": "services/mailu",
"targetNamespace": "mailu-mailserver" "targetNamespace": "mailu-mailserver"
}, },
{
"name": "maintenance",
"path": "services/maintenance",
"targetNamespace": null
},
{ {
"name": "metallb", "name": "metallb",
"path": "infrastructure/metallb", "path": "infrastructure/metallb",
@ -116,11 +121,26 @@
"path": "services/openldap", "path": "services/openldap",
"targetNamespace": "sso" "targetNamespace": "sso"
}, },
{
"name": "outline",
"path": "services/outline",
"targetNamespace": "outline"
},
{ {
"name": "pegasus", "name": "pegasus",
"path": "services/pegasus", "path": "services/pegasus",
"targetNamespace": "jellyfin" "targetNamespace": "jellyfin"
}, },
{
"name": "planka",
"path": "services/planka",
"targetNamespace": "planka"
},
{
"name": "postgres",
"path": "infrastructure/postgres",
"targetNamespace": "postgres"
},
{ {
"name": "sui-metrics", "name": "sui-metrics",
"path": "services/sui-metrics/overlays/atlas", "path": "services/sui-metrics/overlays/atlas",
@ -163,7 +183,7 @@
"serviceAccountName": null, "serviceAccountName": null,
"nodeSelector": {}, "nodeSelector": {},
"images": [ "images": [
"ollama/ollama:latest" "ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d"
] ]
}, },
{ {
@ -179,7 +199,7 @@
"node-role.kubernetes.io/worker": "true" "node-role.kubernetes.io/worker": "true"
}, },
"images": [ "images": [
"registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-84" "registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92"
] ]
}, },
{ {
@ -195,7 +215,7 @@
"node-role.kubernetes.io/worker": "true" "node-role.kubernetes.io/worker": "true"
}, },
"images": [ "images": [
"registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-84" "registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92"
] ]
}, },
{ {
@ -214,21 +234,6 @@
"python:3.11-slim" "python:3.11-slim"
] ]
}, },
{
"kind": "Deployment",
"namespace": "ci-demo",
"name": "ci-demo",
"labels": {
"app.kubernetes.io/name": "ci-demo"
},
"serviceAccountName": null,
"nodeSelector": {
"hardware": "rpi4"
},
"images": [
"registry.bstein.dev/infra/ci-demo:v0.0.0-3"
]
},
{ {
"kind": "Deployment", "kind": "Deployment",
"namespace": "comms", "namespace": "comms",
@ -271,7 +276,7 @@
"hardware": "rpi5" "hardware": "rpi5"
}, },
"images": [ "images": [
"ghcr.io/element-hq/element-call:latest" "ghcr.io/element-hq/element-call@sha256:e6897c7818331714eae19d83ef8ea94a8b41115f0d8d3f62c2fed2d02c65c9bc"
] ]
}, },
{ {
@ -345,56 +350,6 @@
"nginx:1.27-alpine" "nginx:1.27-alpine"
] ]
}, },
{
"kind": "Deployment",
"namespace": "comms",
"name": "othrys-element-element-web",
"labels": {
"app.kubernetes.io/instance": "othrys-element",
"app.kubernetes.io/name": "element-web"
},
"serviceAccountName": "othrys-element-element-web",
"nodeSelector": {
"hardware": "rpi5"
},
"images": [
"ghcr.io/element-hq/element-web:v1.12.6"
]
},
{
"kind": "Deployment",
"namespace": "comms",
"name": "othrys-synapse-matrix-synapse",
"labels": {
"app.kubernetes.io/component": "synapse",
"app.kubernetes.io/instance": "othrys-synapse",
"app.kubernetes.io/name": "matrix-synapse"
},
"serviceAccountName": "default",
"nodeSelector": {
"hardware": "rpi5"
},
"images": [
"ghcr.io/element-hq/synapse:v1.144.0"
]
},
{
"kind": "Deployment",
"namespace": "comms",
"name": "othrys-synapse-redis-master",
"labels": {
"app.kubernetes.io/component": "master",
"app.kubernetes.io/instance": "othrys-synapse",
"app.kubernetes.io/managed-by": "Helm",
"app.kubernetes.io/name": "redis",
"helm.sh/chart": "redis-17.17.1"
},
"serviceAccountName": "othrys-synapse-redis",
"nodeSelector": {},
"images": [
"docker.io/bitnamilegacy/redis:7.0.12-debian-11-r34"
]
},
{ {
"kind": "DaemonSet", "kind": "DaemonSet",
"namespace": "crypto", "namespace": "crypto",
@ -407,7 +362,7 @@
"node-role.kubernetes.io/worker": "true" "node-role.kubernetes.io/worker": "true"
}, },
"images": [ "images": [
"ghcr.io/tari-project/xmrig:latest" "ghcr.io/tari-project/xmrig@sha256:80defbfd0b640d604c91cb5101d3642db7928e1e68ee3c6b011289b3565a39d9"
] ]
}, },
{ {
@ -681,6 +636,66 @@
"hashicorp/vault-csi-provider:1.7.0" "hashicorp/vault-csi-provider:1.7.0"
] ]
}, },
{
"kind": "DaemonSet",
"namespace": "logging",
"name": "node-image-gc-rpi4",
"labels": {
"app": "node-image-gc-rpi4"
},
"serviceAccountName": "node-image-gc-rpi4",
"nodeSelector": {
"hardware": "rpi4"
},
"images": [
"bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131"
]
},
{
"kind": "DaemonSet",
"namespace": "logging",
"name": "node-image-prune-rpi5",
"labels": {
"app": "node-image-prune-rpi5"
},
"serviceAccountName": "node-image-prune-rpi5",
"nodeSelector": {
"hardware": "rpi5"
},
"images": [
"bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131"
]
},
{
"kind": "DaemonSet",
"namespace": "logging",
"name": "node-log-rotation",
"labels": {
"app": "node-log-rotation"
},
"serviceAccountName": "node-log-rotation",
"nodeSelector": {
"hardware": "rpi5"
},
"images": [
"bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131"
]
},
{
"kind": "Deployment",
"namespace": "logging",
"name": "oauth2-proxy-logs",
"labels": {
"app": "oauth2-proxy-logs"
},
"serviceAccountName": null,
"nodeSelector": {
"node-role.kubernetes.io/worker": "true"
},
"images": [
"quay.io/oauth2-proxy/oauth2-proxy:v7.6.0"
]
},
{ {
"kind": "Deployment", "kind": "Deployment",
"namespace": "longhorn-system", "namespace": "longhorn-system",
@ -708,7 +723,7 @@
"mailu.bstein.dev/vip": "true" "mailu.bstein.dev/vip": "true"
}, },
"images": [ "images": [
"lachlanevenson/k8s-kubectl:latest" "registry.bstein.dev/bstein/kubectl:1.35.0"
] ]
}, },
{ {
@ -726,37 +741,30 @@
}, },
{ {
"kind": "DaemonSet", "kind": "DaemonSet",
"namespace": "metallb-system", "namespace": "maintenance",
"name": "metallb-speaker", "name": "node-image-sweeper",
"labels": { "labels": {
"app.kubernetes.io/component": "speaker", "app": "node-image-sweeper"
"app.kubernetes.io/instance": "metallb",
"app.kubernetes.io/name": "metallb"
}, },
"serviceAccountName": "metallb-speaker", "serviceAccountName": "node-image-sweeper",
"nodeSelector": { "nodeSelector": {
"kubernetes.io/os": "linux" "kubernetes.io/os": "linux"
}, },
"images": [ "images": [
"quay.io/frrouting/frr:10.4.1", "python:3.12.9-alpine3.20"
"quay.io/metallb/speaker:v0.15.3"
] ]
}, },
{ {
"kind": "Deployment", "kind": "DaemonSet",
"namespace": "metallb-system", "namespace": "maintenance",
"name": "metallb-controller", "name": "node-nofile",
"labels": { "labels": {
"app.kubernetes.io/component": "controller", "app": "node-nofile"
"app.kubernetes.io/instance": "metallb",
"app.kubernetes.io/name": "metallb"
},
"serviceAccountName": "metallb-controller",
"nodeSelector": {
"kubernetes.io/os": "linux"
}, },
"serviceAccountName": "node-nofile",
"nodeSelector": {},
"images": [ "images": [
"quay.io/metallb/controller:v0.15.3" "bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131"
] ]
}, },
{ {
@ -772,6 +780,21 @@
"registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04" "registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04"
] ]
}, },
{
"kind": "DaemonSet",
"namespace": "monitoring",
"name": "jetson-tegrastats-exporter",
"labels": {
"app": "jetson-tegrastats-exporter"
},
"serviceAccountName": "default",
"nodeSelector": {
"jetson": "true"
},
"images": [
"python:3.10-slim"
]
},
{ {
"kind": "Deployment", "kind": "Deployment",
"namespace": "monitoring", "namespace": "monitoring",
@ -797,7 +820,7 @@
"hardware": "rpi5" "hardware": "rpi5"
}, },
"images": [ "images": [
"collabora/code:latest" "collabora/code@sha256:3c58d0e9bae75e4647467d0c7d91cb66f261d3e814709aed590b5c334a04db26"
] ]
}, },
{ {
@ -815,6 +838,66 @@
"nextcloud:29-apache" "nextcloud:29-apache"
] ]
}, },
{
"kind": "Deployment",
"namespace": "outline",
"name": "outline",
"labels": {
"app": "outline"
},
"serviceAccountName": null,
"nodeSelector": {
"node-role.kubernetes.io/worker": "true"
},
"images": [
"outlinewiki/outline:1.2.0"
]
},
{
"kind": "Deployment",
"namespace": "outline",
"name": "outline-redis",
"labels": {
"app": "outline-redis"
},
"serviceAccountName": null,
"nodeSelector": {
"node-role.kubernetes.io/worker": "true"
},
"images": [
"redis:7.4.1-alpine"
]
},
{
"kind": "Deployment",
"namespace": "planka",
"name": "planka",
"labels": {
"app": "planka"
},
"serviceAccountName": null,
"nodeSelector": {
"node-role.kubernetes.io/worker": "true"
},
"images": [
"ghcr.io/plankanban/planka:2.0.0-rc.4"
]
},
{
"kind": "StatefulSet",
"namespace": "postgres",
"name": "postgres",
"labels": {
"app": "postgres"
},
"serviceAccountName": "postgres-vault",
"nodeSelector": {
"node-role.kubernetes.io/worker": "true"
},
"images": [
"postgres:15"
]
},
{ {
"kind": "Deployment", "kind": "Deployment",
"namespace": "sso", "namespace": "sso",
@ -984,22 +1067,6 @@
} }
] ]
}, },
{
"namespace": "ci-demo",
"name": "ci-demo",
"type": "ClusterIP",
"selector": {
"app.kubernetes.io/name": "ci-demo"
},
"ports": [
{
"name": "http",
"port": 80,
"targetPort": "http",
"protocol": "TCP"
}
]
},
{ {
"namespace": "comms", "namespace": "comms",
"name": "coturn", "name": "coturn",
@ -1454,94 +1521,6 @@
} }
] ]
}, },
{
"namespace": "comms",
"name": "othrys-element-element-web",
"type": "ClusterIP",
"selector": {
"app.kubernetes.io/instance": "othrys-element",
"app.kubernetes.io/name": "element-web"
},
"ports": [
{
"name": "http",
"port": 80,
"targetPort": "http",
"protocol": "TCP"
}
]
},
{
"namespace": "comms",
"name": "othrys-synapse-matrix-synapse",
"type": "ClusterIP",
"selector": {
"app.kubernetes.io/component": "synapse",
"app.kubernetes.io/instance": "othrys-synapse",
"app.kubernetes.io/name": "matrix-synapse"
},
"ports": [
{
"name": "http",
"port": 8008,
"targetPort": "http",
"protocol": "TCP"
}
]
},
{
"namespace": "comms",
"name": "othrys-synapse-redis-headless",
"type": "ClusterIP",
"selector": {
"app.kubernetes.io/instance": "othrys-synapse",
"app.kubernetes.io/name": "redis"
},
"ports": [
{
"name": "tcp-redis",
"port": 6379,
"targetPort": "redis",
"protocol": "TCP"
}
]
},
{
"namespace": "comms",
"name": "othrys-synapse-redis-master",
"type": "ClusterIP",
"selector": {
"app.kubernetes.io/component": "master",
"app.kubernetes.io/instance": "othrys-synapse",
"app.kubernetes.io/name": "redis"
},
"ports": [
{
"name": "tcp-redis",
"port": 6379,
"targetPort": "redis",
"protocol": "TCP"
}
]
},
{
"namespace": "comms",
"name": "othrys-synapse-replication",
"type": "ClusterIP",
"selector": {
"app.kubernetes.io/component": "synapse",
"app.kubernetes.io/instance": "othrys-synapse",
"app.kubernetes.io/name": "matrix-synapse"
},
"ports": [
{
"name": "replication",
"port": 9093,
"targetPort": "replication",
"protocol": "TCP"
}
]
},
{ {
"namespace": "crypto", "namespace": "crypto",
"name": "monerod", "name": "monerod",
@ -1743,6 +1722,22 @@
} }
] ]
}, },
{
"namespace": "logging",
"name": "oauth2-proxy-logs",
"type": "ClusterIP",
"selector": {
"app": "oauth2-proxy-logs"
},
"ports": [
{
"name": "http",
"port": 80,
"targetPort": 4180,
"protocol": "TCP"
}
]
},
{ {
"namespace": "longhorn-system", "namespace": "longhorn-system",
"name": "oauth2-proxy-longhorn", "name": "oauth2-proxy-longhorn",
@ -1823,24 +1818,6 @@
} }
] ]
}, },
{
"namespace": "metallb-system",
"name": "metallb-webhook-service",
"type": "ClusterIP",
"selector": {
"app.kubernetes.io/component": "controller",
"app.kubernetes.io/instance": "metallb",
"app.kubernetes.io/name": "metallb"
},
"ports": [
{
"name": null,
"port": 443,
"targetPort": 9443,
"protocol": "TCP"
}
]
},
{ {
"namespace": "monitoring", "namespace": "monitoring",
"name": "dcgm-exporter", "name": "dcgm-exporter",
@ -1857,6 +1834,22 @@
} }
] ]
}, },
{
"namespace": "monitoring",
"name": "jetson-tegrastats-exporter",
"type": "ClusterIP",
"selector": {
"app": "jetson-tegrastats-exporter"
},
"ports": [
{
"name": "metrics",
"port": 9100,
"targetPort": "metrics",
"protocol": "TCP"
}
]
},
{ {
"namespace": "monitoring", "namespace": "monitoring",
"name": "postmark-exporter", "name": "postmark-exporter",
@ -1905,6 +1898,70 @@
} }
] ]
}, },
{
"namespace": "outline",
"name": "outline",
"type": "ClusterIP",
"selector": {
"app": "outline"
},
"ports": [
{
"name": "http",
"port": 80,
"targetPort": "http",
"protocol": "TCP"
}
]
},
{
"namespace": "outline",
"name": "outline-redis",
"type": "ClusterIP",
"selector": {
"app": "outline-redis"
},
"ports": [
{
"name": "redis",
"port": 6379,
"targetPort": "redis",
"protocol": "TCP"
}
]
},
{
"namespace": "planka",
"name": "planka",
"type": "ClusterIP",
"selector": {
"app": "planka"
},
"ports": [
{
"name": "http",
"port": 80,
"targetPort": "http",
"protocol": "TCP"
}
]
},
{
"namespace": "postgres",
"name": "postgres-service",
"type": "ClusterIP",
"selector": {
"app": "postgres"
},
"ports": [
{
"name": "postgres",
"port": 5432,
"targetPort": 5432,
"protocol": "TCP"
}
]
},
{ {
"namespace": "sso", "namespace": "sso",
"name": "keycloak", "name": "keycloak",
@ -2110,7 +2167,7 @@
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "matrix-wellknown-bstein-dev", "name": "matrix-wellknown-bstein-dev",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2130,7 +2187,7 @@
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "matrix-wellknown-bstein-dev", "name": "matrix-wellknown-bstein-dev",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2170,7 +2227,7 @@
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "element-call", "name": "element-call",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2250,7 +2307,7 @@
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "livekit-jwt-ingress", "name": "livekit-jwt-ingress",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2270,27 +2327,7 @@
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "livekit-ingress", "name": "livekit-ingress",
"source": "communication" "source": "comms"
}
},
{
"host": "live.bstein.dev",
"path": "/",
"backend": {
"namespace": "comms",
"service": "othrys-element-element-web",
"port": 80,
"workloads": [
{
"kind": "Deployment",
"name": "othrys-element-element-web"
}
]
},
"via": {
"kind": "Ingress",
"name": "othrys-element-element-web",
"source": "communication"
} }
}, },
{ {
@ -2310,7 +2347,7 @@
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "matrix-wellknown", "name": "matrix-wellknown",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2330,7 +2367,7 @@
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "matrix-wellknown", "name": "matrix-wellknown",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2340,17 +2377,32 @@
"namespace": "comms", "namespace": "comms",
"service": "othrys-synapse-matrix-synapse", "service": "othrys-synapse-matrix-synapse",
"port": 8008, "port": 8008,
"workloads": []
},
"via": {
"kind": "Ingress",
"name": "matrix-routing",
"source": "comms"
}
},
{
"host": "logs.bstein.dev",
"path": "/",
"backend": {
"namespace": "logging",
"service": "oauth2-proxy-logs",
"port": "http",
"workloads": [ "workloads": [
{ {
"kind": "Deployment", "kind": "Deployment",
"name": "othrys-synapse-matrix-synapse" "name": "oauth2-proxy-logs"
} }
] ]
}, },
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "matrix-routing", "name": "logs",
"source": "communication" "source": "logging"
} }
}, },
{ {
@ -2405,7 +2457,7 @@
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "matrix-routing", "name": "matrix-routing",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2425,7 +2477,7 @@
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "matrix-wellknown-matrix-live", "name": "matrix-wellknown-matrix-live",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2445,7 +2497,7 @@
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "matrix-wellknown-matrix-live", "name": "matrix-wellknown-matrix-live",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2455,17 +2507,12 @@
"namespace": "comms", "namespace": "comms",
"service": "othrys-synapse-matrix-synapse", "service": "othrys-synapse-matrix-synapse",
"port": 8008, "port": 8008,
"workloads": [ "workloads": []
{
"kind": "Deployment",
"name": "othrys-synapse-matrix-synapse"
}
]
}, },
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "matrix-routing", "name": "matrix-routing",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2485,7 +2532,7 @@
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "matrix-routing", "name": "matrix-routing",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2505,7 +2552,7 @@
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "matrix-routing", "name": "matrix-routing",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2525,7 +2572,7 @@
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "matrix-routing", "name": "matrix-routing",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2545,7 +2592,7 @@
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "matrix-routing", "name": "matrix-routing",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2565,7 +2612,7 @@
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "matrix-routing", "name": "matrix-routing",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2575,17 +2622,12 @@
"namespace": "comms", "namespace": "comms",
"service": "othrys-synapse-matrix-synapse", "service": "othrys-synapse-matrix-synapse",
"port": 8008, "port": 8008,
"workloads": [ "workloads": []
{
"kind": "Deployment",
"name": "othrys-synapse-matrix-synapse"
}
]
}, },
"via": { "via": {
"kind": "Ingress", "kind": "Ingress",
"name": "matrix-routing", "name": "matrix-routing",
"source": "communication" "source": "comms"
} }
}, },
{ {
@ -2608,6 +2650,26 @@
"source": "monerod" "source": "monerod"
} }
}, },
{
"host": "notes.bstein.dev",
"path": "/",
"backend": {
"namespace": "outline",
"service": "outline",
"port": 80,
"workloads": [
{
"kind": "Deployment",
"name": "outline"
}
]
},
"via": {
"kind": "Ingress",
"name": "outline",
"source": "outline"
}
},
{ {
"host": "office.bstein.dev", "host": "office.bstein.dev",
"path": "/", "path": "/",
@ -2728,6 +2790,26 @@
"source": "jellyfin" "source": "jellyfin"
} }
}, },
{
"host": "tasks.bstein.dev",
"path": "/",
"backend": {
"namespace": "planka",
"service": "planka",
"port": 80,
"workloads": [
{
"kind": "Deployment",
"name": "planka"
}
]
},
"via": {
"kind": "Ingress",
"name": "planka",
"source": "planka"
}
},
{ {
"host": "vault.bstein.dev", "host": "vault.bstein.dev",
"path": "/", "path": "/",
@ -2750,12 +2832,27 @@
} }
], ],
"helmrelease_host_hints": { "helmrelease_host_hints": {
"comms:comms/othrys-element": [
"call.live.bstein.dev",
"live.bstein.dev",
"matrix.live.bstein.dev"
],
"comms:comms/othrys-synapse": [
"bstein.dev",
"kit.live.bstein.dev",
"live.bstein.dev",
"matrix.live.bstein.dev",
"turn.live.bstein.dev"
],
"gitops-ui:flux-system/weave-gitops": [ "gitops-ui:flux-system/weave-gitops": [
"cd.bstein.dev" "cd.bstein.dev"
], ],
"harbor:harbor/harbor": [ "harbor:harbor/harbor": [
"registry.bstein.dev" "registry.bstein.dev"
], ],
"logging:logging/data-prepper": [
"registry.bstein.dev"
],
"mailu:mailu-mailserver/mailu": [ "mailu:mailu-mailserver/mailu": [
"bstein.dev", "bstein.dev",
"mail.bstein.dev" "mail.bstein.dev"
@ -2764,6 +2861,7 @@
"alerts.bstein.dev" "alerts.bstein.dev"
], ],
"monitoring:monitoring/grafana": [ "monitoring:monitoring/grafana": [
"bstein.dev",
"metrics.bstein.dev", "metrics.bstein.dev",
"sso.bstein.dev" "sso.bstein.dev"
] ]

View File

@ -1,3 +1,4 @@
# knowledge/catalog/atlas.yaml
# Generated by scripts/knowledge_render_atlas.py (do not edit by hand) # Generated by scripts/knowledge_render_atlas.py (do not edit by hand)
cluster: atlas cluster: atlas
sources: sources:
@ -7,7 +8,7 @@ sources:
- name: bstein-dev-home - name: bstein-dev-home
path: services/bstein-dev-home path: services/bstein-dev-home
targetNamespace: bstein-dev-home targetNamespace: bstein-dev-home
- name: communication - name: comms
path: services/comms path: services/comms
targetNamespace: comms targetNamespace: comms
- name: core - name: core
@ -40,12 +41,18 @@ sources:
- name: keycloak - name: keycloak
path: services/keycloak path: services/keycloak
targetNamespace: sso targetNamespace: sso
- name: logging
path: services/logging
targetNamespace: null
- name: longhorn-ui - name: longhorn-ui
path: infrastructure/longhorn/ui-ingress path: infrastructure/longhorn/ui-ingress
targetNamespace: longhorn-system targetNamespace: longhorn-system
- name: mailu - name: mailu
path: services/mailu path: services/mailu
targetNamespace: mailu-mailserver targetNamespace: mailu-mailserver
- name: maintenance
path: services/maintenance
targetNamespace: null
- name: metallb - name: metallb
path: infrastructure/metallb path: infrastructure/metallb
targetNamespace: metallb-system targetNamespace: metallb-system
@ -67,9 +74,18 @@ sources:
- name: openldap - name: openldap
path: services/openldap path: services/openldap
targetNamespace: sso targetNamespace: sso
- name: outline
path: services/outline
targetNamespace: outline
- name: pegasus - name: pegasus
path: services/pegasus path: services/pegasus
targetNamespace: jellyfin targetNamespace: jellyfin
- name: planka
path: services/planka
targetNamespace: planka
- name: postgres
path: infrastructure/postgres
targetNamespace: postgres
- name: sui-metrics - name: sui-metrics
path: services/sui-metrics/overlays/atlas path: services/sui-metrics/overlays/atlas
targetNamespace: sui-metrics targetNamespace: sui-metrics
@ -97,7 +113,7 @@ workloads:
serviceAccountName: null serviceAccountName: null
nodeSelector: {} nodeSelector: {}
images: images:
- ollama/ollama:latest - ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d
- kind: Deployment - kind: Deployment
namespace: bstein-dev-home namespace: bstein-dev-home
name: bstein-dev-home-backend name: bstein-dev-home-backend
@ -108,7 +124,7 @@ workloads:
kubernetes.io/arch: arm64 kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: 'true' node-role.kubernetes.io/worker: 'true'
images: images:
- registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-84 - registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92
- kind: Deployment - kind: Deployment
namespace: bstein-dev-home namespace: bstein-dev-home
name: bstein-dev-home-frontend name: bstein-dev-home-frontend
@ -119,7 +135,7 @@ workloads:
kubernetes.io/arch: arm64 kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: 'true' node-role.kubernetes.io/worker: 'true'
images: images:
- registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-84 - registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92
- kind: Deployment - kind: Deployment
namespace: bstein-dev-home namespace: bstein-dev-home
name: chat-ai-gateway name: chat-ai-gateway
@ -160,7 +176,7 @@ workloads:
nodeSelector: nodeSelector:
hardware: rpi5 hardware: rpi5
images: images:
- ghcr.io/element-hq/element-call:latest - ghcr.io/element-hq/element-call@sha256:e6897c7818331714eae19d83ef8ea94a8b41115f0d8d3f62c2fed2d02c65c9bc
- kind: Deployment - kind: Deployment
namespace: comms namespace: comms
name: livekit name: livekit
@ -209,42 +225,6 @@ workloads:
nodeSelector: {} nodeSelector: {}
images: images:
- nginx:1.27-alpine - nginx:1.27-alpine
- kind: Deployment
namespace: comms
name: othrys-element-element-web
labels:
app.kubernetes.io/instance: othrys-element
app.kubernetes.io/name: element-web
serviceAccountName: othrys-element-element-web
nodeSelector:
hardware: rpi5
images:
- ghcr.io/element-hq/element-web:v1.12.6
- kind: Deployment
namespace: comms
name: othrys-synapse-matrix-synapse
labels:
app.kubernetes.io/component: synapse
app.kubernetes.io/instance: othrys-synapse
app.kubernetes.io/name: matrix-synapse
serviceAccountName: default
nodeSelector:
hardware: rpi5
images:
- ghcr.io/element-hq/synapse:v1.144.0
- kind: Deployment
namespace: comms
name: othrys-synapse-redis-master
labels:
app.kubernetes.io/component: master
app.kubernetes.io/instance: othrys-synapse
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: redis
helm.sh/chart: redis-17.17.1
serviceAccountName: othrys-synapse-redis
nodeSelector: {}
images:
- docker.io/bitnamilegacy/redis:7.0.12-debian-11-r34
- kind: DaemonSet - kind: DaemonSet
namespace: crypto namespace: crypto
name: monero-xmrig name: monero-xmrig
@ -254,7 +234,7 @@ workloads:
nodeSelector: nodeSelector:
node-role.kubernetes.io/worker: 'true' node-role.kubernetes.io/worker: 'true'
images: images:
- ghcr.io/tari-project/xmrig:latest - ghcr.io/tari-project/xmrig@sha256:80defbfd0b640d604c91cb5101d3642db7928e1e68ee3c6b011289b3565a39d9
- kind: Deployment - kind: Deployment
namespace: crypto namespace: crypto
name: monero-p2pool name: monero-p2pool
@ -447,6 +427,46 @@ workloads:
kubernetes.io/os: linux kubernetes.io/os: linux
images: images:
- hashicorp/vault-csi-provider:1.7.0 - hashicorp/vault-csi-provider:1.7.0
- kind: DaemonSet
namespace: logging
name: node-image-gc-rpi4
labels:
app: node-image-gc-rpi4
serviceAccountName: node-image-gc-rpi4
nodeSelector:
hardware: rpi4
images:
- bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
- kind: DaemonSet
namespace: logging
name: node-image-prune-rpi5
labels:
app: node-image-prune-rpi5
serviceAccountName: node-image-prune-rpi5
nodeSelector:
hardware: rpi5
images:
- bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
- kind: DaemonSet
namespace: logging
name: node-log-rotation
labels:
app: node-log-rotation
serviceAccountName: node-log-rotation
nodeSelector:
hardware: rpi5
images:
- bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
- kind: Deployment
namespace: logging
name: oauth2-proxy-logs
labels:
app: oauth2-proxy-logs
serviceAccountName: null
nodeSelector:
node-role.kubernetes.io/worker: 'true'
images:
- quay.io/oauth2-proxy/oauth2-proxy:v7.6.0
- kind: Deployment - kind: Deployment
namespace: longhorn-system namespace: longhorn-system
name: oauth2-proxy-longhorn name: oauth2-proxy-longhorn
@ -466,7 +486,7 @@ workloads:
nodeSelector: nodeSelector:
mailu.bstein.dev/vip: 'true' mailu.bstein.dev/vip: 'true'
images: images:
- lachlanevenson/k8s-kubectl:latest - registry.bstein.dev/bstein/kubectl:1.35.0
- kind: Deployment - kind: Deployment
namespace: mailu-mailserver namespace: mailu-mailserver
name: mailu-sync-listener name: mailu-sync-listener
@ -477,30 +497,24 @@ workloads:
images: images:
- python:3.11-alpine - python:3.11-alpine
- kind: DaemonSet - kind: DaemonSet
namespace: metallb-system namespace: maintenance
name: metallb-speaker name: node-image-sweeper
labels: labels:
app.kubernetes.io/component: speaker app: node-image-sweeper
app.kubernetes.io/instance: metallb serviceAccountName: node-image-sweeper
app.kubernetes.io/name: metallb
serviceAccountName: metallb-speaker
nodeSelector: nodeSelector:
kubernetes.io/os: linux kubernetes.io/os: linux
images: images:
- quay.io/frrouting/frr:10.4.1 - python:3.12.9-alpine3.20
- quay.io/metallb/speaker:v0.15.3 - kind: DaemonSet
- kind: Deployment namespace: maintenance
namespace: metallb-system name: node-nofile
name: metallb-controller
labels: labels:
app.kubernetes.io/component: controller app: node-nofile
app.kubernetes.io/instance: metallb serviceAccountName: node-nofile
app.kubernetes.io/name: metallb nodeSelector: {}
serviceAccountName: metallb-controller
nodeSelector:
kubernetes.io/os: linux
images: images:
- quay.io/metallb/controller:v0.15.3 - bitnami/kubectl@sha256:554ab88b1858e8424c55de37ad417b16f2a0e65d1607aa0f3fe3ce9b9f10b131
- kind: DaemonSet - kind: DaemonSet
namespace: monitoring namespace: monitoring
name: dcgm-exporter name: dcgm-exporter
@ -510,6 +524,16 @@ workloads:
nodeSelector: {} nodeSelector: {}
images: images:
- registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04 - registry.bstein.dev/monitoring/dcgm-exporter:4.4.2-4.7.0-ubuntu22.04
- kind: DaemonSet
namespace: monitoring
name: jetson-tegrastats-exporter
labels:
app: jetson-tegrastats-exporter
serviceAccountName: default
nodeSelector:
jetson: 'true'
images:
- python:3.10-slim
- kind: Deployment - kind: Deployment
namespace: monitoring namespace: monitoring
name: postmark-exporter name: postmark-exporter
@ -528,7 +552,7 @@ workloads:
nodeSelector: nodeSelector:
hardware: rpi5 hardware: rpi5
images: images:
- collabora/code:latest - collabora/code@sha256:3c58d0e9bae75e4647467d0c7d91cb66f261d3e814709aed590b5c334a04db26
- kind: Deployment - kind: Deployment
namespace: nextcloud namespace: nextcloud
name: nextcloud name: nextcloud
@ -539,6 +563,46 @@ workloads:
hardware: rpi5 hardware: rpi5
images: images:
- nextcloud:29-apache - nextcloud:29-apache
- kind: Deployment
namespace: outline
name: outline
labels:
app: outline
serviceAccountName: null
nodeSelector:
node-role.kubernetes.io/worker: 'true'
images:
- outlinewiki/outline:1.2.0
- kind: Deployment
namespace: outline
name: outline-redis
labels:
app: outline-redis
serviceAccountName: null
nodeSelector:
node-role.kubernetes.io/worker: 'true'
images:
- redis:7.4.1-alpine
- kind: Deployment
namespace: planka
name: planka
labels:
app: planka
serviceAccountName: null
nodeSelector:
node-role.kubernetes.io/worker: 'true'
images:
- ghcr.io/plankanban/planka:2.0.0-rc.4
- kind: StatefulSet
namespace: postgres
name: postgres
labels:
app: postgres
serviceAccountName: postgres-vault
nodeSelector:
node-role.kubernetes.io/worker: 'true'
images:
- postgres:15
- kind: Deployment - kind: Deployment
namespace: sso namespace: sso
name: keycloak name: keycloak
@ -650,16 +714,6 @@ services:
port: 80 port: 80
targetPort: 8080 targetPort: 8080
protocol: TCP protocol: TCP
- namespace: ci-demo
name: ci-demo
type: ClusterIP
selector:
app.kubernetes.io/name: ci-demo
ports:
- name: http
port: 80
targetPort: http
protocol: TCP
- namespace: comms - namespace: comms
name: coturn name: coturn
type: LoadBalancer type: LoadBalancer
@ -958,64 +1012,6 @@ services:
port: 80 port: 80
targetPort: 80 targetPort: 80
protocol: TCP protocol: TCP
- namespace: comms
name: othrys-element-element-web
type: ClusterIP
selector:
app.kubernetes.io/instance: othrys-element
app.kubernetes.io/name: element-web
ports:
- name: http
port: 80
targetPort: http
protocol: TCP
- namespace: comms
name: othrys-synapse-matrix-synapse
type: ClusterIP
selector:
app.kubernetes.io/component: synapse
app.kubernetes.io/instance: othrys-synapse
app.kubernetes.io/name: matrix-synapse
ports:
- name: http
port: 8008
targetPort: http
protocol: TCP
- namespace: comms
name: othrys-synapse-redis-headless
type: ClusterIP
selector:
app.kubernetes.io/instance: othrys-synapse
app.kubernetes.io/name: redis
ports:
- name: tcp-redis
port: 6379
targetPort: redis
protocol: TCP
- namespace: comms
name: othrys-synapse-redis-master
type: ClusterIP
selector:
app.kubernetes.io/component: master
app.kubernetes.io/instance: othrys-synapse
app.kubernetes.io/name: redis
ports:
- name: tcp-redis
port: 6379
targetPort: redis
protocol: TCP
- namespace: comms
name: othrys-synapse-replication
type: ClusterIP
selector:
app.kubernetes.io/component: synapse
app.kubernetes.io/instance: othrys-synapse
app.kubernetes.io/name: matrix-synapse
ports:
- name: replication
port: 9093
targetPort: replication
protocol: TCP
- namespace: crypto - namespace: crypto
name: monerod name: monerod
type: ClusterIP type: ClusterIP
@ -1143,6 +1139,16 @@ services:
port: 443 port: 443
targetPort: websecure targetPort: websecure
protocol: TCP protocol: TCP
- namespace: logging
name: oauth2-proxy-logs
type: ClusterIP
selector:
app: oauth2-proxy-logs
ports:
- name: http
port: 80
targetPort: 4180
protocol: TCP
- namespace: longhorn-system - namespace: longhorn-system
name: oauth2-proxy-longhorn name: oauth2-proxy-longhorn
type: ClusterIP type: ClusterIP
@ -1195,18 +1201,6 @@ services:
port: 8080 port: 8080
targetPort: 8080 targetPort: 8080
protocol: TCP protocol: TCP
- namespace: metallb-system
name: metallb-webhook-service
type: ClusterIP
selector:
app.kubernetes.io/component: controller
app.kubernetes.io/instance: metallb
app.kubernetes.io/name: metallb
ports:
- name: null
port: 443
targetPort: 9443
protocol: TCP
- namespace: monitoring - namespace: monitoring
name: dcgm-exporter name: dcgm-exporter
type: ClusterIP type: ClusterIP
@ -1217,6 +1211,16 @@ services:
port: 9400 port: 9400
targetPort: metrics targetPort: metrics
protocol: TCP protocol: TCP
- namespace: monitoring
name: jetson-tegrastats-exporter
type: ClusterIP
selector:
app: jetson-tegrastats-exporter
ports:
- name: metrics
port: 9100
targetPort: metrics
protocol: TCP
- namespace: monitoring - namespace: monitoring
name: postmark-exporter name: postmark-exporter
type: ClusterIP type: ClusterIP
@ -1247,6 +1251,46 @@ services:
port: 80 port: 80
targetPort: http targetPort: http
protocol: TCP protocol: TCP
- namespace: outline
name: outline
type: ClusterIP
selector:
app: outline
ports:
- name: http
port: 80
targetPort: http
protocol: TCP
- namespace: outline
name: outline-redis
type: ClusterIP
selector:
app: outline-redis
ports:
- name: redis
port: 6379
targetPort: redis
protocol: TCP
- namespace: planka
name: planka
type: ClusterIP
selector:
app: planka
ports:
- name: http
port: 80
targetPort: http
protocol: TCP
- namespace: postgres
name: postgres-service
type: ClusterIP
selector:
app: postgres
ports:
- name: postgres
port: 5432
targetPort: 5432
protocol: TCP
- namespace: sso - namespace: sso
name: keycloak name: keycloak
type: ClusterIP type: ClusterIP
@ -1378,7 +1422,7 @@ http_endpoints:
via: via:
kind: Ingress kind: Ingress
name: matrix-wellknown-bstein-dev name: matrix-wellknown-bstein-dev
source: communication source: comms
- host: bstein.dev - host: bstein.dev
path: /.well-known/matrix/server path: /.well-known/matrix/server
backend: backend:
@ -1389,7 +1433,7 @@ http_endpoints:
via: via:
kind: Ingress kind: Ingress
name: matrix-wellknown-bstein-dev name: matrix-wellknown-bstein-dev
source: communication source: comms
- host: bstein.dev - host: bstein.dev
path: /api path: /api
backend: backend:
@ -1415,7 +1459,7 @@ http_endpoints:
via: via:
kind: Ingress kind: Ingress
name: element-call name: element-call
source: communication source: comms
- host: chat.ai.bstein.dev - host: chat.ai.bstein.dev
path: / path: /
backend: backend:
@ -1467,7 +1511,7 @@ http_endpoints:
via: via:
kind: Ingress kind: Ingress
name: livekit-jwt-ingress name: livekit-jwt-ingress
source: communication source: comms
- host: kit.live.bstein.dev - host: kit.live.bstein.dev
path: /livekit/sfu path: /livekit/sfu
backend: backend:
@ -1480,20 +1524,7 @@ http_endpoints:
via: via:
kind: Ingress kind: Ingress
name: livekit-ingress name: livekit-ingress
source: communication source: comms
- host: live.bstein.dev
path: /
backend:
namespace: comms
service: othrys-element-element-web
port: 80
workloads:
- kind: Deployment
name: othrys-element-element-web
via:
kind: Ingress
name: othrys-element-element-web
source: communication
- host: live.bstein.dev - host: live.bstein.dev
path: /.well-known/matrix/client path: /.well-known/matrix/client
backend: backend:
@ -1504,7 +1535,7 @@ http_endpoints:
via: via:
kind: Ingress kind: Ingress
name: matrix-wellknown name: matrix-wellknown
source: communication source: comms
- host: live.bstein.dev - host: live.bstein.dev
path: /.well-known/matrix/server path: /.well-known/matrix/server
backend: backend:
@ -1515,20 +1546,31 @@ http_endpoints:
via: via:
kind: Ingress kind: Ingress
name: matrix-wellknown name: matrix-wellknown
source: communication source: comms
- host: live.bstein.dev - host: live.bstein.dev
path: /_matrix path: /_matrix
backend: backend:
namespace: comms namespace: comms
service: othrys-synapse-matrix-synapse service: othrys-synapse-matrix-synapse
port: 8008 port: 8008
workloads: &id002 workloads: []
- kind: Deployment
name: othrys-synapse-matrix-synapse
via: via:
kind: Ingress kind: Ingress
name: matrix-routing name: matrix-routing
source: communication source: comms
- host: logs.bstein.dev
path: /
backend:
namespace: logging
service: oauth2-proxy-logs
port: http
workloads:
- kind: Deployment
name: oauth2-proxy-logs
via:
kind: Ingress
name: logs
source: logging
- host: longhorn.bstein.dev - host: longhorn.bstein.dev
path: / path: /
backend: backend:
@ -1559,13 +1601,13 @@ http_endpoints:
namespace: comms namespace: comms
service: matrix-authentication-service service: matrix-authentication-service
port: 8080 port: 8080
workloads: &id003 workloads: &id002
- kind: Deployment - kind: Deployment
name: matrix-authentication-service name: matrix-authentication-service
via: via:
kind: Ingress kind: Ingress
name: matrix-routing name: matrix-routing
source: communication source: comms
- host: matrix.live.bstein.dev - host: matrix.live.bstein.dev
path: /.well-known/matrix/client path: /.well-known/matrix/client
backend: backend:
@ -1576,7 +1618,7 @@ http_endpoints:
via: via:
kind: Ingress kind: Ingress
name: matrix-wellknown-matrix-live name: matrix-wellknown-matrix-live
source: communication source: comms
- host: matrix.live.bstein.dev - host: matrix.live.bstein.dev
path: /.well-known/matrix/server path: /.well-known/matrix/server
backend: backend:
@ -1587,86 +1629,86 @@ http_endpoints:
via: via:
kind: Ingress kind: Ingress
name: matrix-wellknown-matrix-live name: matrix-wellknown-matrix-live
source: communication source: comms
- host: matrix.live.bstein.dev - host: matrix.live.bstein.dev
path: /_matrix path: /_matrix
backend: backend:
namespace: comms namespace: comms
service: othrys-synapse-matrix-synapse service: othrys-synapse-matrix-synapse
port: 8008 port: 8008
workloads: *id002 workloads: []
via: via:
kind: Ingress kind: Ingress
name: matrix-routing name: matrix-routing
source: communication source: comms
- host: matrix.live.bstein.dev - host: matrix.live.bstein.dev
path: /_matrix/client/r0/register path: /_matrix/client/r0/register
backend: backend:
namespace: comms namespace: comms
service: matrix-guest-register service: matrix-guest-register
port: 8080 port: 8080
workloads: &id004 workloads: &id003
- kind: Deployment - kind: Deployment
name: matrix-guest-register name: matrix-guest-register
via: via:
kind: Ingress kind: Ingress
name: matrix-routing name: matrix-routing
source: communication source: comms
- host: matrix.live.bstein.dev - host: matrix.live.bstein.dev
path: /_matrix/client/v3/login path: /_matrix/client/v3/login
backend: backend:
namespace: comms namespace: comms
service: matrix-authentication-service service: matrix-authentication-service
port: 8080 port: 8080
workloads: *id003 workloads: *id002
via: via:
kind: Ingress kind: Ingress
name: matrix-routing name: matrix-routing
source: communication source: comms
- host: matrix.live.bstein.dev - host: matrix.live.bstein.dev
path: /_matrix/client/v3/logout path: /_matrix/client/v3/logout
backend: backend:
namespace: comms namespace: comms
service: matrix-authentication-service service: matrix-authentication-service
port: 8080 port: 8080
workloads: *id003 workloads: *id002
via: via:
kind: Ingress kind: Ingress
name: matrix-routing name: matrix-routing
source: communication source: comms
- host: matrix.live.bstein.dev - host: matrix.live.bstein.dev
path: /_matrix/client/v3/refresh path: /_matrix/client/v3/refresh
backend: backend:
namespace: comms namespace: comms
service: matrix-authentication-service service: matrix-authentication-service
port: 8080 port: 8080
workloads: *id003 workloads: *id002
via: via:
kind: Ingress kind: Ingress
name: matrix-routing name: matrix-routing
source: communication source: comms
- host: matrix.live.bstein.dev - host: matrix.live.bstein.dev
path: /_matrix/client/v3/register path: /_matrix/client/v3/register
backend: backend:
namespace: comms namespace: comms
service: matrix-guest-register service: matrix-guest-register
port: 8080 port: 8080
workloads: *id004 workloads: *id003
via: via:
kind: Ingress kind: Ingress
name: matrix-routing name: matrix-routing
source: communication source: comms
- host: matrix.live.bstein.dev - host: matrix.live.bstein.dev
path: /_synapse path: /_synapse
backend: backend:
namespace: comms namespace: comms
service: othrys-synapse-matrix-synapse service: othrys-synapse-matrix-synapse
port: 8008 port: 8008
workloads: *id002 workloads: []
via: via:
kind: Ingress kind: Ingress
name: matrix-routing name: matrix-routing
source: communication source: comms
- host: monero.bstein.dev - host: monero.bstein.dev
path: / path: /
backend: backend:
@ -1680,6 +1722,19 @@ http_endpoints:
kind: Ingress kind: Ingress
name: monerod name: monerod
source: monerod source: monerod
- host: notes.bstein.dev
path: /
backend:
namespace: outline
service: outline
port: 80
workloads:
- kind: Deployment
name: outline
via:
kind: Ingress
name: outline
source: outline
- host: office.bstein.dev - host: office.bstein.dev
path: / path: /
backend: backend:
@ -1758,6 +1813,19 @@ http_endpoints:
kind: Ingress kind: Ingress
name: jellyfin name: jellyfin
source: jellyfin source: jellyfin
- host: tasks.bstein.dev
path: /
backend:
namespace: planka
service: planka
port: 80
workloads:
- kind: Deployment
name: planka
via:
kind: Ingress
name: planka
source: planka
- host: vault.bstein.dev - host: vault.bstein.dev
path: / path: /
backend: backend:
@ -1772,15 +1840,28 @@ http_endpoints:
name: vaultwarden-ingress name: vaultwarden-ingress
source: vaultwarden source: vaultwarden
helmrelease_host_hints: helmrelease_host_hints:
comms:comms/othrys-element:
- call.live.bstein.dev
- live.bstein.dev
- matrix.live.bstein.dev
comms:comms/othrys-synapse:
- bstein.dev
- kit.live.bstein.dev
- live.bstein.dev
- matrix.live.bstein.dev
- turn.live.bstein.dev
gitops-ui:flux-system/weave-gitops: gitops-ui:flux-system/weave-gitops:
- cd.bstein.dev - cd.bstein.dev
harbor:harbor/harbor: harbor:harbor/harbor:
- registry.bstein.dev - registry.bstein.dev
logging:logging/data-prepper:
- registry.bstein.dev
mailu:mailu-mailserver/mailu: mailu:mailu-mailserver/mailu:
- bstein.dev - bstein.dev
- mail.bstein.dev - mail.bstein.dev
monitoring:monitoring/alertmanager: monitoring:monitoring/alertmanager:
- alerts.bstein.dev - alerts.bstein.dev
monitoring:monitoring/grafana: monitoring:monitoring/grafana:
- bstein.dev
- metrics.bstein.dev - metrics.bstein.dev
- sso.bstein.dev - sso.bstein.dev

View File

@ -47,15 +47,14 @@ flowchart LR
wl_comms_livekit["comms/livekit (Deployment)"] wl_comms_livekit["comms/livekit (Deployment)"]
svc_comms_livekit --> wl_comms_livekit svc_comms_livekit --> wl_comms_livekit
host_live_bstein_dev["live.bstein.dev"] host_live_bstein_dev["live.bstein.dev"]
svc_comms_othrys_element_element_web["comms/othrys-element-element-web (Service)"]
host_live_bstein_dev --> svc_comms_othrys_element_element_web
wl_comms_othrys_element_element_web["comms/othrys-element-element-web (Deployment)"]
svc_comms_othrys_element_element_web --> wl_comms_othrys_element_element_web
host_live_bstein_dev --> svc_comms_matrix_wellknown host_live_bstein_dev --> svc_comms_matrix_wellknown
svc_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Service)"] svc_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Service)"]
host_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse host_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse
wl_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Deployment)"] host_logs_bstein_dev["logs.bstein.dev"]
svc_comms_othrys_synapse_matrix_synapse --> wl_comms_othrys_synapse_matrix_synapse svc_logging_oauth2_proxy_logs["logging/oauth2-proxy-logs (Service)"]
host_logs_bstein_dev --> svc_logging_oauth2_proxy_logs
wl_logging_oauth2_proxy_logs["logging/oauth2-proxy-logs (Deployment)"]
svc_logging_oauth2_proxy_logs --> wl_logging_oauth2_proxy_logs
host_longhorn_bstein_dev["longhorn.bstein.dev"] host_longhorn_bstein_dev["longhorn.bstein.dev"]
svc_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Service)"] svc_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Service)"]
host_longhorn_bstein_dev --> svc_longhorn_system_oauth2_proxy_longhorn host_longhorn_bstein_dev --> svc_longhorn_system_oauth2_proxy_longhorn
@ -80,6 +79,11 @@ flowchart LR
host_monero_bstein_dev --> svc_crypto_monerod host_monero_bstein_dev --> svc_crypto_monerod
wl_crypto_monerod["crypto/monerod (Deployment)"] wl_crypto_monerod["crypto/monerod (Deployment)"]
svc_crypto_monerod --> wl_crypto_monerod svc_crypto_monerod --> wl_crypto_monerod
host_notes_bstein_dev["notes.bstein.dev"]
svc_outline_outline["outline/outline (Service)"]
host_notes_bstein_dev --> svc_outline_outline
wl_outline_outline["outline/outline (Deployment)"]
svc_outline_outline --> wl_outline_outline
host_office_bstein_dev["office.bstein.dev"] host_office_bstein_dev["office.bstein.dev"]
svc_nextcloud_collabora["nextcloud/collabora (Service)"] svc_nextcloud_collabora["nextcloud/collabora (Service)"]
host_office_bstein_dev --> svc_nextcloud_collabora host_office_bstein_dev --> svc_nextcloud_collabora
@ -110,6 +114,11 @@ flowchart LR
host_stream_bstein_dev --> svc_jellyfin_jellyfin host_stream_bstein_dev --> svc_jellyfin_jellyfin
wl_jellyfin_jellyfin["jellyfin/jellyfin (Deployment)"] wl_jellyfin_jellyfin["jellyfin/jellyfin (Deployment)"]
svc_jellyfin_jellyfin --> wl_jellyfin_jellyfin svc_jellyfin_jellyfin --> wl_jellyfin_jellyfin
host_tasks_bstein_dev["tasks.bstein.dev"]
svc_planka_planka["planka/planka (Service)"]
host_tasks_bstein_dev --> svc_planka_planka
wl_planka_planka["planka/planka (Deployment)"]
svc_planka_planka --> wl_planka_planka
host_vault_bstein_dev["vault.bstein.dev"] host_vault_bstein_dev["vault.bstein.dev"]
svc_vaultwarden_vaultwarden_service["vaultwarden/vaultwarden-service (Service)"] svc_vaultwarden_vaultwarden_service["vaultwarden/vaultwarden-service (Service)"]
host_vault_bstein_dev --> svc_vaultwarden_vaultwarden_service host_vault_bstein_dev --> svc_vaultwarden_vaultwarden_service
@ -133,10 +142,7 @@ flowchart LR
wl_comms_livekit_token_service wl_comms_livekit_token_service
svc_comms_livekit svc_comms_livekit
wl_comms_livekit wl_comms_livekit
svc_comms_othrys_element_element_web
wl_comms_othrys_element_element_web
svc_comms_othrys_synapse_matrix_synapse svc_comms_othrys_synapse_matrix_synapse
wl_comms_othrys_synapse_matrix_synapse
svc_comms_matrix_authentication_service svc_comms_matrix_authentication_service
wl_comms_matrix_authentication_service wl_comms_matrix_authentication_service
svc_comms_matrix_guest_register svc_comms_matrix_guest_register
@ -160,6 +166,10 @@ flowchart LR
svc_jenkins_jenkins svc_jenkins_jenkins
wl_jenkins_jenkins wl_jenkins_jenkins
end end
subgraph logging[logging]
svc_logging_oauth2_proxy_logs
wl_logging_oauth2_proxy_logs
end
subgraph longhorn_system[longhorn-system] subgraph longhorn_system[longhorn-system]
svc_longhorn_system_oauth2_proxy_longhorn svc_longhorn_system_oauth2_proxy_longhorn
wl_longhorn_system_oauth2_proxy_longhorn wl_longhorn_system_oauth2_proxy_longhorn
@ -173,6 +183,14 @@ flowchart LR
svc_nextcloud_collabora svc_nextcloud_collabora
wl_nextcloud_collabora wl_nextcloud_collabora
end end
subgraph outline[outline]
svc_outline_outline
wl_outline_outline
end
subgraph planka[planka]
svc_planka_planka
wl_planka_planka
end
subgraph sso[sso] subgraph sso[sso]
svc_sso_oauth2_proxy svc_sso_oauth2_proxy
wl_sso_oauth2_proxy wl_sso_oauth2_proxy

View File

@ -85,19 +85,17 @@ WORKER_TOTAL = len(WORKER_NODES)
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}" CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
WORKER_SUFFIX = f"/{WORKER_TOTAL}" WORKER_SUFFIX = f"/{WORKER_TOTAL}"
# Namespaces considered infrastructure (excluded from workload counts) # Namespaces considered infrastructure (excluded from workload counts)
INFRA_NAMESPACES = [ INFRA_PATTERNS = [
"kube-system", "kube-.*",
"longhorn-system", ".*-system",
"metallb-system", "traefik",
"monitoring", "monitoring",
"logging", "logging",
"cert-manager", "cert-manager",
"flux-system",
"traefik",
"maintenance", "maintenance",
"postgres", "postgres",
] ]
INFRA_REGEX = f"^({'|'.join(INFRA_NAMESPACES)})$" INFRA_REGEX = f"^({'|'.join(INFRA_PATTERNS)})$"
# Namespaces allowed on control plane without counting as workloads # Namespaces allowed on control plane without counting as workloads
CP_ALLOWED_NS = INFRA_REGEX CP_ALLOWED_NS = INFRA_REGEX
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]" LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
@ -319,6 +317,25 @@ NAMESPACE_SCOPE_WORKLOAD = f'namespace!~"{INFRA_REGEX}"'
NAMESPACE_SCOPE_ALL = 'namespace=~".*"' NAMESPACE_SCOPE_ALL = 'namespace=~".*"'
NAMESPACE_SCOPE_INFRA = f'namespace=~"{INFRA_REGEX}"' NAMESPACE_SCOPE_INFRA = f'namespace=~"{INFRA_REGEX}"'
NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"] NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"]
GLUE_LABEL = 'label_atlas_bstein_dev_glue="true"'
GLUE_JOBS = f"kube_cronjob_labels{{{GLUE_LABEL}}}"
GLUE_FILTER = f"and on(namespace,cronjob) {GLUE_JOBS}"
GLUE_LAST_SUCCESS = f"(kube_cronjob_status_last_successful_time {GLUE_FILTER})"
GLUE_LAST_SCHEDULE = f"(kube_cronjob_status_last_schedule_time {GLUE_FILTER})"
GLUE_SUSPENDED = f"(kube_cronjob_spec_suspend {GLUE_FILTER}) == 1"
GLUE_ACTIVE = f"(kube_cronjob_status_active {GLUE_FILTER})"
GLUE_LAST_SUCCESS_AGE = f"(time() - {GLUE_LAST_SUCCESS})"
GLUE_LAST_SCHEDULE_AGE = f"(time() - {GLUE_LAST_SCHEDULE})"
GLUE_LAST_SUCCESS_AGE_HOURS = f"({GLUE_LAST_SUCCESS_AGE}) / 3600"
GLUE_LAST_SCHEDULE_AGE_HOURS = f"({GLUE_LAST_SCHEDULE_AGE}) / 3600"
GLUE_STALE_WINDOW_SEC = 36 * 3600
GLUE_STALE = f"({GLUE_LAST_SUCCESS_AGE} > bool {GLUE_STALE_WINDOW_SEC})"
GLUE_MISSING = f"({GLUE_JOBS} unless on(namespace,cronjob) kube_cronjob_status_last_successful_time)"
GLUE_STALE_ACTIVE = f"({GLUE_STALE} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
GLUE_MISSING_ACTIVE = f"({GLUE_MISSING} unless on(namespace,cronjob) {GLUE_SUSPENDED})"
GLUE_STALE_COUNT = f"(sum({GLUE_STALE_ACTIVE}) + count({GLUE_MISSING_ACTIVE}))"
GLUE_MISSING_COUNT = f"count({GLUE_MISSING_ACTIVE})"
GLUE_SUSPENDED_COUNT = f"sum({GLUE_SUSPENDED})"
GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"] GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"]
GPU_NODE_REGEX = "|".join(GPU_NODES) GPU_NODE_REGEX = "|".join(GPU_NODES)
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))" TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
@ -965,7 +982,7 @@ def build_overview():
30, 30,
"Mail Sent (1d)", "Mail Sent (1d)",
'max(postmark_outbound_sent{window="1d"})', 'max(postmark_outbound_sent{window="1d"})',
{"h": 2, "w": 6, "x": 0, "y": 8}, {"h": 2, "w": 5, "x": 0, "y": 8},
unit="none", unit="none",
links=link_to("atlas-mail"), links=link_to("atlas-mail"),
) )
@ -976,7 +993,7 @@ def build_overview():
"type": "stat", "type": "stat",
"title": "Mail Bounces (1d)", "title": "Mail Bounces (1d)",
"datasource": PROM_DS, "datasource": PROM_DS,
"gridPos": {"h": 2, "w": 6, "x": 12, "y": 8}, "gridPos": {"h": 2, "w": 5, "x": 10, "y": 8},
"targets": [ "targets": [
{ {
"expr": 'max(postmark_outbound_bounce_rate{window="1d"})', "expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
@ -1022,7 +1039,7 @@ def build_overview():
32, 32,
"Mail Success Rate (1d)", "Mail Success Rate (1d)",
'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)', 'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
{"h": 2, "w": 6, "x": 6, "y": 8}, {"h": 2, "w": 5, "x": 5, "y": 8},
unit="percent", unit="percent",
thresholds=mail_success_thresholds, thresholds=mail_success_thresholds,
decimals=1, decimals=1,
@ -1034,7 +1051,7 @@ def build_overview():
33, 33,
"Mail Limit Used (30d)", "Mail Limit Used (30d)",
"max(postmark_sending_limit_used_percent)", "max(postmark_sending_limit_used_percent)",
{"h": 2, "w": 6, "x": 18, "y": 8}, {"h": 2, "w": 5, "x": 15, "y": 8},
unit="percent", unit="percent",
thresholds=mail_limit_thresholds, thresholds=mail_limit_thresholds,
decimals=1, decimals=1,
@ -1072,7 +1089,7 @@ def build_overview():
namespace_cpu_share_expr(cpu_scope), namespace_cpu_share_expr(cpu_scope),
{"h": 9, "w": 8, "x": 0, "y": 16}, {"h": 9, "w": 8, "x": 0, "y": 16},
links=namespace_scope_links("namespace_scope_cpu"), links=namespace_scope_links("namespace_scope_cpu"),
description="Values are normalized within the selected scope; use panel links to switch scope.", description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
) )
) )
panels.append( panels.append(
@ -1082,7 +1099,7 @@ def build_overview():
namespace_gpu_share_expr(gpu_scope), namespace_gpu_share_expr(gpu_scope),
{"h": 9, "w": 8, "x": 8, "y": 16}, {"h": 9, "w": 8, "x": 8, "y": 16},
links=namespace_scope_links("namespace_scope_gpu"), links=namespace_scope_links("namespace_scope_gpu"),
description="Values are normalized within the selected scope; use panel links to switch scope.", description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
) )
) )
panels.append( panels.append(
@ -1092,7 +1109,7 @@ def build_overview():
namespace_ram_share_expr(ram_scope), namespace_ram_share_expr(ram_scope),
{"h": 9, "w": 8, "x": 16, "y": 16}, {"h": 9, "w": 8, "x": 16, "y": 16},
links=namespace_scope_links("namespace_scope_ram"), links=namespace_scope_links("namespace_scope_ram"),
description="Values are normalized within the selected scope; use panel links to switch scope.", description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
) )
) )
@ -1727,7 +1744,7 @@ def build_storage_dashboard():
stat_panel( stat_panel(
31, 31,
"Maintenance Cron Freshness (s)", "Maintenance Cron Freshness (s)",
'time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob=~"image-sweeper|grafana-smtp-sync"})', 'time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob="image-sweeper"})',
{"h": 4, "w": 12, "x": 12, "y": 44}, {"h": 4, "w": 12, "x": 12, "y": 44},
unit="s", unit="s",
thresholds={ thresholds={
@ -2136,6 +2153,98 @@ def build_mail_dashboard():
} }
def build_testing_dashboard():
panels = []
sort_desc = [{"id": "labelsToFields", "options": {}}, {"id": "sortBy", "options": {"fields": ["Value"], "order": "desc"}}]
panels.append(
stat_panel(
1,
"Glue Jobs Stale (>36h)",
GLUE_STALE_COUNT,
{"h": 4, "w": 6, "x": 0, "y": 0},
unit="none",
thresholds={
"mode": "absolute",
"steps": [
{"color": "green", "value": None},
{"color": "yellow", "value": 1},
{"color": "orange", "value": 2},
{"color": "red", "value": 3},
],
},
)
)
panels.append(
table_panel(
2,
"Glue Jobs Missing Success",
GLUE_MISSING_ACTIVE,
{"h": 4, "w": 6, "x": 6, "y": 0},
unit="none",
transformations=sort_desc,
instant=True,
)
)
panels.append(
table_panel(
3,
"Glue Jobs Suspended",
GLUE_SUSPENDED,
{"h": 4, "w": 6, "x": 12, "y": 0},
unit="none",
transformations=sort_desc,
instant=True,
)
)
panels.append(
table_panel(
4,
"Glue Jobs Active Runs",
GLUE_ACTIVE,
{"h": 4, "w": 6, "x": 18, "y": 0},
unit="none",
transformations=sort_desc,
instant=True,
)
)
panels.append(
table_panel(
5,
"Glue Jobs Last Success (hours ago)",
GLUE_LAST_SUCCESS_AGE_HOURS,
{"h": 8, "w": 12, "x": 0, "y": 4},
unit="h",
transformations=sort_desc,
instant=True,
)
)
panels.append(
table_panel(
6,
"Glue Jobs Last Schedule (hours ago)",
GLUE_LAST_SCHEDULE_AGE_HOURS,
{"h": 8, "w": 12, "x": 12, "y": 4},
unit="h",
transformations=sort_desc,
instant=True,
)
)
return {
"uid": "atlas-testing",
"title": "Atlas Testing",
"folderUid": PRIVATE_FOLDER,
"editable": True,
"panels": panels,
"time": {"from": "now-7d", "to": "now"},
"annotations": {"list": []},
"schemaVersion": 39,
"style": "dark",
"tags": ["atlas", "testing"],
}
def build_gpu_dashboard(): def build_gpu_dashboard():
panels = [] panels = []
gpu_scope = "$namespace_scope_gpu" gpu_scope = "$namespace_scope_gpu"
@ -2146,7 +2255,7 @@ def build_gpu_dashboard():
namespace_gpu_share_expr(gpu_scope), namespace_gpu_share_expr(gpu_scope),
{"h": 8, "w": 12, "x": 0, "y": 0}, {"h": 8, "w": 12, "x": 0, "y": 0},
links=namespace_scope_links("namespace_scope_gpu"), links=namespace_scope_links("namespace_scope_gpu"),
description="Values are normalized within the selected scope; use panel links to switch scope.", description="Shares are normalized within the selected filter. Switching scope changes the denominator.",
) )
) )
panels.append( panels.append(
@ -2229,6 +2338,10 @@ DASHBOARDS = {
"builder": build_mail_dashboard, "builder": build_mail_dashboard,
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-mail.yaml", "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-mail.yaml",
}, },
"atlas-testing": {
"builder": build_testing_dashboard,
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-testing.yaml",
},
"atlas-gpu": { "atlas-gpu": {
"builder": build_gpu_dashboard, "builder": build_gpu_dashboard,
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-gpu.yaml", "configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-gpu.yaml",

View File

@ -505,7 +505,9 @@ def main() -> int:
diagram_path = out_dir / "diagrams" / "atlas-http.mmd" diagram_path = out_dir / "diagrams" / "atlas-http.mmd"
runbooks_json_path = out_dir / "catalog" / "runbooks.json" runbooks_json_path = out_dir / "catalog" / "runbooks.json"
catalog_rel = catalog_path.relative_to(REPO_ROOT).as_posix()
catalog_path.write_text( catalog_path.write_text(
f"# {catalog_rel}\n"
"# Generated by scripts/knowledge_render_atlas.py (do not edit by hand)\n" "# Generated by scripts/knowledge_render_atlas.py (do not edit by hand)\n"
+ yaml.safe_dump(catalog, sort_keys=False), + yaml.safe_dump(catalog, sort_keys=False),
encoding="utf-8", encoding="utf-8",

View File

@ -7,6 +7,8 @@ test accounts created via the bstein-dev-home onboarding portal.
Targets (best-effort): Targets (best-effort):
- Keycloak users in realm "atlas" - Keycloak users in realm "atlas"
- Atlas portal Postgres rows (access_requests + dependent tables) - Atlas portal Postgres rows (access_requests + dependent tables)
- Mailu mailboxes created for test users
- Nextcloud Mail accounts created for test users
- Vaultwarden users/invites created by the portal - Vaultwarden users/invites created by the portal
Safety: Safety:
@ -56,6 +58,19 @@ class VaultwardenUser:
status: int status: int
@dataclass(frozen=True)
class MailuUser:
email: str
localpart: str
domain: str
@dataclass(frozen=True)
class NextcloudMailAccount:
account_id: str
email: str
def _run(cmd: list[str], *, input_bytes: bytes | None = None) -> str: def _run(cmd: list[str], *, input_bytes: bytes | None = None) -> str:
proc = subprocess.run( proc = subprocess.run(
cmd, cmd,
@ -70,6 +85,19 @@ def _run(cmd: list[str], *, input_bytes: bytes | None = None) -> str:
return proc.stdout.decode("utf-8", errors="replace") return proc.stdout.decode("utf-8", errors="replace")
def _run_capture(cmd: list[str], *, input_bytes: bytes | None = None) -> tuple[int, str, str]:
proc = subprocess.run(
cmd,
input=input_bytes,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=False,
)
stdout = proc.stdout.decode("utf-8", errors="replace")
stderr = proc.stderr.decode("utf-8", errors="replace")
return proc.returncode, stdout, stderr
def _kubectl_get_secret_value(namespace: str, name: str, key: str) -> str: def _kubectl_get_secret_value(namespace: str, name: str, key: str) -> str:
raw_b64 = _run( raw_b64 = _run(
[ [
@ -110,6 +138,21 @@ def _kubectl_first_pod(namespace: str) -> str:
return pod_name return pod_name
def _kubectl_exec(namespace: str, target: str, cmd: list[str]) -> tuple[int, str, str]:
return _run_capture(
[
"kubectl",
"-n",
namespace,
"exec",
"-i",
target,
"--",
*cmd,
]
)
def _validate_prefixes(prefixes: list[str]) -> list[str]: def _validate_prefixes(prefixes: list[str]) -> list[str]:
cleaned: list[str] = [] cleaned: list[str] = []
for prefix in prefixes: for prefix in prefixes:
@ -187,6 +230,62 @@ def _keycloak_delete_user(server: str, realm: str, token: str, user_id: str) ->
raise raise
def _sql_quote(value: str) -> str:
return "'" + value.replace("'", "''") + "'"
def _psql_exec(db_name: str, sql: str, *, user: str = "postgres") -> str:
postgres_pod = _kubectl_first_pod("postgres")
return _run(
[
"kubectl",
"-n",
"postgres",
"exec",
"-i",
postgres_pod,
"--",
"psql",
"-U",
user,
"-d",
db_name,
"-c",
sql,
]
)
def _psql_tsv(db_name: str, sql: str, *, user: str = "postgres") -> list[list[str]]:
postgres_pod = _kubectl_first_pod("postgres")
out = _run(
[
"kubectl",
"-n",
"postgres",
"exec",
"-i",
postgres_pod,
"--",
"psql",
"-U",
user,
"-d",
db_name,
"-At",
"-F",
"\t",
"-c",
sql,
]
)
rows: list[list[str]] = []
for line in out.splitlines():
parts = line.split("\t")
rows.append(parts)
return rows
def _psql_json(portal_db_url: str, sql: str) -> list[dict[str, Any]]: def _psql_json(portal_db_url: str, sql: str) -> list[dict[str, Any]]:
postgres_pod = _kubectl_first_pod("postgres") postgres_pod = _kubectl_first_pod("postgres")
out = _run( out = _run(
@ -256,6 +355,89 @@ def _portal_delete_requests(portal_db_url: str, prefixes: list[str]) -> int:
return int(match.group(1)) if match else 0 return int(match.group(1)) if match else 0
def _mailu_list_users(prefixes: list[str], domain: str, db_name: str, protected: set[str]) -> list[MailuUser]:
if not prefixes or not domain:
return []
clauses = " OR ".join([f"localpart LIKE '{p}%'" for p in prefixes])
sql = (
'SELECT email, localpart, domain_name '
'FROM "user" '
f"WHERE domain_name = {_sql_quote(domain)} AND ({clauses}) "
"ORDER BY email;"
)
rows = _psql_tsv(db_name, sql)
users: list[MailuUser] = []
for row in rows:
if len(row) < 3:
continue
email = row[0].strip()
if not email or email in protected:
continue
users.append(MailuUser(email=email, localpart=row[1].strip(), domain=row[2].strip()))
return users
def _mailu_delete_users(db_name: str, emails: list[str]) -> int:
if not emails:
return 0
email_list = ",".join(_sql_quote(e) for e in emails)
sql = f'DELETE FROM "user" WHERE email IN ({email_list});'
out = _psql_exec(db_name, sql)
match = re.search(r"DELETE\\s+(\\d+)", out)
return int(match.group(1)) if match else 0
_NEXTCLOUD_ACCOUNT_RE = re.compile(r"^Account\\s+(\\d+):")
_EMAIL_RE = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+")
def _nextcloud_exec(cmd: list[str]) -> tuple[int, str, str]:
namespace = os.getenv("NEXTCLOUD_NAMESPACE", "nextcloud").strip() or "nextcloud"
target = os.getenv("NEXTCLOUD_EXEC_TARGET", "deploy/nextcloud").strip() or "deploy/nextcloud"
return _kubectl_exec(namespace, target, cmd)
def _parse_nextcloud_mail_accounts(export_output: str) -> list[NextcloudMailAccount]:
accounts: list[NextcloudMailAccount] = []
current_id = ""
for line in export_output.splitlines():
line = line.strip()
if not line:
continue
match = _NEXTCLOUD_ACCOUNT_RE.match(line)
if match:
current_id = match.group(1)
continue
if not current_id or "@" not in line:
continue
email_match = _EMAIL_RE.search(line)
if not email_match:
continue
accounts.append(NextcloudMailAccount(account_id=current_id, email=email_match.group(0)))
current_id = ""
return accounts
def _nextcloud_list_mail_accounts(username: str) -> list[NextcloudMailAccount]:
occ_path = os.getenv("NEXTCLOUD_OCC_PATH", "/var/www/html/occ").strip() or "/var/www/html/occ"
rc, out, err = _nextcloud_exec(["php", occ_path, "mail:account:export", username])
if rc != 0:
message = (err or out).strip()
lowered = message.lower()
if any(token in lowered for token in ("not found", "does not exist", "no such user", "unknown user")):
return []
raise RuntimeError(f"nextcloud mail export failed for {username}: {message}")
return _parse_nextcloud_mail_accounts(out)
def _nextcloud_delete_mail_account(account_id: str) -> None:
occ_path = os.getenv("NEXTCLOUD_OCC_PATH", "/var/www/html/occ").strip() or "/var/www/html/occ"
rc, out, err = _nextcloud_exec(["php", occ_path, "mail:account:delete", "-q", account_id])
if rc != 0:
message = (err or out).strip()
raise RuntimeError(f"nextcloud mail delete failed for account {account_id}: {message}")
def _vaultwarden_admin_cookie(admin_token: str, base_url: str) -> str: def _vaultwarden_admin_cookie(admin_token: str, base_url: str) -> str:
data = urllib.parse.urlencode({"token": admin_token}).encode("utf-8") data = urllib.parse.urlencode({"token": admin_token}).encode("utf-8")
req = urllib.request.Request(f"{base_url}/admin", data=data, method="POST") req = urllib.request.Request(f"{base_url}/admin", data=data, method="POST")
@ -356,6 +538,8 @@ def main() -> int:
), ),
) )
parser.add_argument("--skip-keycloak", action="store_true", help="Skip Keycloak user deletion.") parser.add_argument("--skip-keycloak", action="store_true", help="Skip Keycloak user deletion.")
parser.add_argument("--skip-mailu", action="store_true", help="Skip Mailu mailbox cleanup.")
parser.add_argument("--skip-nextcloud-mail", action="store_true", help="Skip Nextcloud Mail account cleanup.")
parser.add_argument("--skip-portal-db", action="store_true", help="Skip portal DB cleanup.") parser.add_argument("--skip-portal-db", action="store_true", help="Skip portal DB cleanup.")
parser.add_argument("--skip-vaultwarden", action="store_true", help="Skip Vaultwarden cleanup.") parser.add_argument("--skip-vaultwarden", action="store_true", help="Skip Vaultwarden cleanup.")
parser.add_argument( parser.add_argument(
@ -364,6 +548,18 @@ def main() -> int:
default=[], default=[],
help="Keycloak usernames that must never be deleted (repeatable).", help="Keycloak usernames that must never be deleted (repeatable).",
) )
parser.add_argument(
"--protect-mailu-email",
action="append",
default=[],
help="Mailu emails that must never be deleted (repeatable).",
)
parser.add_argument(
"--protect-nextcloud-username",
action="append",
default=[],
help="Nextcloud usernames that must never be touched (repeatable).",
)
parser.add_argument( parser.add_argument(
"--protect-vaultwarden-email", "--protect-vaultwarden-email",
action="append", action="append",
@ -376,7 +572,11 @@ def main() -> int:
apply = bool(args.apply) apply = bool(args.apply)
expected_confirm = ",".join(prefixes) expected_confirm = ",".join(prefixes)
protected_keycloak = {"bstein", "robotuser", *[u.strip() for u in args.protect_keycloak_username if u.strip()]} protected_keycloak = {"bstein", "robotuser", *[u.strip() for u in args.protect_keycloak_username if u.strip()]}
protected_mailu = {e.strip() for e in args.protect_mailu_email if e.strip()}
protected_nextcloud = {u.strip() for u in args.protect_nextcloud_username if u.strip()}
protected_vaultwarden = {e.strip() for e in args.protect_vaultwarden_email if e.strip()} protected_vaultwarden = {e.strip() for e in args.protect_vaultwarden_email if e.strip()}
mailu_domain = os.getenv("MAILU_DOMAIN", "bstein.dev").strip() or "bstein.dev"
mailu_db_name = os.getenv("MAILU_DB_NAME", "mailu").strip() or "mailu"
if apply and args.confirm != expected_confirm: if apply and args.confirm != expected_confirm:
raise SystemExit( raise SystemExit(
@ -388,23 +588,29 @@ def main() -> int:
print("mode:", "APPLY (destructive)" if apply else "DRY RUN (no changes)") print("mode:", "APPLY (destructive)" if apply else "DRY RUN (no changes)")
if protected_keycloak: if protected_keycloak:
print("protected keycloak usernames:", ", ".join(sorted(protected_keycloak))) print("protected keycloak usernames:", ", ".join(sorted(protected_keycloak)))
if protected_mailu:
print("protected mailu emails:", ", ".join(sorted(protected_mailu)))
if protected_nextcloud:
print("protected nextcloud usernames:", ", ".join(sorted(protected_nextcloud)))
if protected_vaultwarden: if protected_vaultwarden:
print("protected vaultwarden emails:", ", ".join(sorted(protected_vaultwarden))) print("protected vaultwarden emails:", ", ".join(sorted(protected_vaultwarden)))
print() print()
portal_requests: list[PortalRequestRow] = []
if not args.skip_portal_db: if not args.skip_portal_db:
portal_db_url = _kubectl_get_secret_value("bstein-dev-home", "atlas-portal-db", "PORTAL_DATABASE_URL") portal_db_url = _kubectl_get_secret_value("bstein-dev-home", "atlas-portal-db", "PORTAL_DATABASE_URL")
requests = _portal_list_requests(portal_db_url, prefixes) portal_requests = _portal_list_requests(portal_db_url, prefixes)
print(f"Portal DB: {len(requests)} access_requests matched") print(f"Portal DB: {len(portal_requests)} access_requests matched")
for row in requests[:50]: for row in portal_requests[:50]:
print(f" {row.request_code}\t{row.status}\t{row.username}") print(f" {row.request_code}\t{row.status}\t{row.username}")
if len(requests) > 50: if len(portal_requests) > 50:
print(f" ... and {len(requests) - 50} more") print(f" ... and {len(portal_requests) - 50} more")
if apply and requests: if apply and portal_requests:
deleted = _portal_delete_requests(portal_db_url, prefixes) deleted = _portal_delete_requests(portal_db_url, prefixes)
print(f"Portal DB: deleted {deleted} access_requests (cascade removes tasks/steps/artifacts).") print(f"Portal DB: deleted {deleted} access_requests (cascade removes tasks/steps/artifacts).")
print() print()
keycloak_users: list[KeycloakUser] = []
if not args.skip_keycloak: if not args.skip_keycloak:
kc_server = os.getenv("KEYCLOAK_PUBLIC_URL", "https://sso.bstein.dev").rstrip("/") kc_server = os.getenv("KEYCLOAK_PUBLIC_URL", "https://sso.bstein.dev").rstrip("/")
kc_realm = os.getenv("KEYCLOAK_REALM", "atlas") kc_realm = os.getenv("KEYCLOAK_REALM", "atlas")
@ -421,18 +627,63 @@ def main() -> int:
if user.username in protected_keycloak: if user.username in protected_keycloak:
continue continue
found[user.user_id] = user found[user.user_id] = user
users = list(found.values()) keycloak_users = list(found.values())
users.sort(key=lambda u: u.username) keycloak_users.sort(key=lambda u: u.username)
print(f"Keycloak: {len(users)} users matched") print(f"Keycloak: {len(keycloak_users)} users matched")
for user in users[:50]: for user in keycloak_users[:50]:
email = user.email or "-" email = user.email or "-"
print(f" {user.username}\t{email}\t{user.user_id}") print(f" {user.username}\t{email}\t{user.user_id}")
if len(users) > 50: if len(keycloak_users) > 50:
print(f" ... and {len(users) - 50} more") print(f" ... and {len(keycloak_users) - 50} more")
if apply and users: if apply and keycloak_users:
for user in users: for user in keycloak_users:
_keycloak_delete_user(kc_server, kc_realm, token, user.user_id) _keycloak_delete_user(kc_server, kc_realm, token, user.user_id)
print(f"Keycloak: deleted {len(users)} users.") print(f"Keycloak: deleted {len(keycloak_users)} users.")
print()
if not args.skip_mailu:
mailu_users = _mailu_list_users(prefixes, mailu_domain, mailu_db_name, protected_mailu)
print(f"Mailu: {len(mailu_users)} mailboxes matched (domain={mailu_domain})")
for user in mailu_users[:50]:
print(f" {user.email}\t{user.localpart}\t{user.domain}")
if len(mailu_users) > 50:
print(f" ... and {len(mailu_users) - 50} more")
if apply and mailu_users:
deleted = _mailu_delete_users(mailu_db_name, [u.email for u in mailu_users])
print(f"Mailu: deleted {deleted} mailboxes.")
print()
if not args.skip_nextcloud_mail:
nextcloud_usernames = {row.username for row in portal_requests if row.username}
nextcloud_usernames.update({u.username for u in keycloak_users if u.username})
nextcloud_usernames = {u for u in nextcloud_usernames if _starts_with_any(u, prefixes)}
nextcloud_usernames = {u for u in nextcloud_usernames if u not in protected_nextcloud}
matches: list[tuple[str, NextcloudMailAccount]] = []
for username in sorted(nextcloud_usernames):
accounts = _nextcloud_list_mail_accounts(username)
for account in accounts:
email = account.email.strip()
if not email:
continue
if not email.lower().endswith(f"@{mailu_domain.lower()}"):
continue
localpart = email.split("@", 1)[0]
if not _starts_with_any(localpart, prefixes):
continue
if email in protected_mailu:
continue
matches.append((username, account))
print(f"Nextcloud Mail: {len(matches)} accounts matched")
for username, account in matches[:50]:
print(f" {username}\t{account.account_id}\t{account.email}")
if len(matches) > 50:
print(f" ... and {len(matches) - 50} more")
if apply and matches:
for _, account in matches:
_nextcloud_delete_mail_account(account.account_id)
print(f"Nextcloud Mail: deleted {len(matches)} accounts.")
print() print()
if not args.skip_vaultwarden: if not args.skip_vaultwarden:

View File

@ -55,11 +55,11 @@ class _FakeResponse:
class _FakeSession: class _FakeSession:
def __init__(self, put_resp, get_resp): def __init__(self, put_resp, get_resps):
self.put_resp = put_resp self.put_resp = put_resp
self.get_resp = get_resp self.get_resps = list(get_resps)
self.put_called = False self.put_called = False
self.get_called = False self.get_calls = 0
def post(self, *args, **kwargs): def post(self, *args, **kwargs):
return _FakeResponse({"access_token": "dummy"}) return _FakeResponse({"access_token": "dummy"})
@ -69,22 +69,26 @@ class _FakeSession:
return self.put_resp return self.put_resp
def get(self, *args, **kwargs): def get(self, *args, **kwargs):
self.get_called = True self.get_calls += 1
return self.get_resp if self.get_resps:
return self.get_resps.pop(0)
return _FakeResponse({})
def test_kc_update_attributes_succeeds(monkeypatch): def test_kc_update_attributes_succeeds(monkeypatch):
sync = load_sync_module(monkeypatch) sync = load_sync_module(monkeypatch)
current_resp = _FakeResponse({"attributes": {}})
ok_resp = _FakeResponse({"attributes": {"mailu_app_password": ["abc"]}}) ok_resp = _FakeResponse({"attributes": {"mailu_app_password": ["abc"]}})
sync.SESSION = _FakeSession(_FakeResponse({}), ok_resp) sync.SESSION = _FakeSession(_FakeResponse({}), [current_resp, ok_resp])
sync.kc_update_attributes("token", {"id": "u1", "username": "u1"}, {"mailu_app_password": "abc"}) sync.kc_update_attributes("token", {"id": "u1", "username": "u1"}, {"mailu_app_password": "abc"})
assert sync.SESSION.put_called and sync.SESSION.get_called assert sync.SESSION.put_called and sync.SESSION.get_calls == 2
def test_kc_update_attributes_raises_without_attribute(monkeypatch): def test_kc_update_attributes_raises_without_attribute(monkeypatch):
sync = load_sync_module(monkeypatch) sync = load_sync_module(monkeypatch)
current_resp = _FakeResponse({"attributes": {}})
missing_attr_resp = _FakeResponse({"attributes": {}}, status=200) missing_attr_resp = _FakeResponse({"attributes": {}}, status=200)
sync.SESSION = _FakeSession(_FakeResponse({}), missing_attr_resp) sync.SESSION = _FakeSession(_FakeResponse({}), [current_resp, missing_attr_resp])
with pytest.raises(Exception): with pytest.raises(Exception):
sync.kc_update_attributes("token", {"id": "u1", "username": "u1"}, {"mailu_app_password": "abc"}) sync.kc_update_attributes("token", {"id": "u1", "username": "u1"}, {"mailu_app_password": "abc"})
@ -144,9 +148,25 @@ def test_main_generates_password_and_upserts(monkeypatch):
sync = load_sync_module(monkeypatch) sync = load_sync_module(monkeypatch)
monkeypatch.setattr(sync.bcrypt_sha256, "hash", lambda password: f"hash:{password}") monkeypatch.setattr(sync.bcrypt_sha256, "hash", lambda password: f"hash:{password}")
users = [ users = [
{"id": "u1", "username": "user1", "email": "user1@example.com", "attributes": {}}, {
{"id": "u2", "username": "user2", "email": "user2@example.com", "attributes": {"mailu_app_password": ["keepme"]}}, "id": "u1",
{"id": "u3", "username": "user3", "email": "user3@other.com", "attributes": {}}, "username": "user1",
"email": "user1@example.com",
"attributes": {"mailu_enabled": ["true"]},
},
{
"id": "u2",
"username": "user2",
"email": "user2@example.com",
"attributes": {"mailu_app_password": ["keepme"], "mailu_enabled": ["true"]},
},
{
"id": "u3",
"username": "user3",
"email": "user3@example.com",
"attributes": {"mailu_email": ["user3@example.com"]},
},
{"id": "u4", "username": "user4", "email": "user4@other.com", "attributes": {}},
] ]
updated = [] updated = []
@ -185,6 +205,6 @@ def test_main_generates_password_and_upserts(monkeypatch):
sync.main() sync.main()
# Always backfill mailu_email, even if Keycloak recovery email is external. # Only mail-enabled users (or legacy users with a mailbox) are synced and backfilled.
assert len(updated) == 3 assert len(updated) == 3
assert conns and len(conns[0]._cursor.executions) == 3 assert conns and len(conns[0]._cursor.executions) == 3

View File

@ -42,7 +42,7 @@ spec:
claimName: ollama-models claimName: ollama-models
initContainers: initContainers:
- name: warm-model - name: warm-model
image: ollama/ollama:latest image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d
env: env:
- name: OLLAMA_HOST - name: OLLAMA_HOST
value: 0.0.0.0 value: 0.0.0.0
@ -75,7 +75,7 @@ spec:
nvidia.com/gpu.shared: 1 nvidia.com/gpu.shared: 1
containers: containers:
- name: ollama - name: ollama
image: ollama/ollama:latest image: ollama/ollama@sha256:2c9595c555fd70a28363489ac03bd5bf9e7c5bdf2890373c3a830ffd7252ce6d
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
ports: ports:
- name: http - name: http

View File

@ -14,6 +14,34 @@ spec:
metadata: metadata:
labels: labels:
app: bstein-dev-home-backend app: bstein-dev-home-backend
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "bstein-dev-home"
vault.hashicorp.com/agent-inject-secret-portal-env.sh: "kv/data/atlas/portal/atlas-portal-db"
vault.hashicorp.com/agent-inject-template-portal-env.sh: |
{{ with secret "kv/data/atlas/portal/atlas-portal-db" }}
export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}"
{{ end }}
{{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }}
export KEYCLOAK_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/chat-ai-keys-runtime" }}
export CHAT_KEY_MATRIX="{{ .Data.data.matrix }}"
export CHAT_KEY_HOMEPAGE="{{ .Data.data.homepage }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/portal-e2e-client" }}
export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}"
export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}"
{{ end }}
{{ with secret "kv/data/atlas/mailu/mailu-initial-account-secret" }}
export SMTP_HOST="mailu-front.mailu-mailserver.svc.cluster.local"
export SMTP_PORT="587"
export SMTP_STARTTLS="true"
export SMTP_USE_TLS="false"
export SMTP_USERNAME="no-reply-portal@bstein.dev"
export SMTP_PASSWORD="{{ .Data.data.password }}"
export SMTP_FROM="no-reply-portal@bstein.dev"
{{ end }}
spec: spec:
automountServiceAccountToken: true automountServiceAccountToken: true
serviceAccountName: bstein-dev-home serviceAccountName: bstein-dev-home
@ -21,20 +49,16 @@ spec:
kubernetes.io/arch: arm64 kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
imagePullSecrets: imagePullSecrets:
- name: harbor-bstein-robot - name: harbor-regcred
containers: containers:
- name: backend - name: backend
image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-95
imagePullPolicy: Always imagePullPolicy: Always
command: ["gunicorn"] command: ["/bin/sh", "-c"]
args: args:
- -b - >-
- 0.0.0.0:8080 . /vault/secrets/portal-env.sh
- --workers && exec gunicorn -b 0.0.0.0:8080 --workers 2 --timeout 180 app:app
- "2"
- --timeout
- "180"
- app:app
env: env:
- name: AI_CHAT_API - name: AI_CHAT_API
value: http://ollama.ai.svc.cluster.local:11434 value: http://ollama.ai.svc.cluster.local:11434
@ -67,18 +91,8 @@ spec:
value: atlas value: atlas
- name: KEYCLOAK_ADMIN_CLIENT_ID - name: KEYCLOAK_ADMIN_CLIENT_ID
value: bstein-dev-home-admin value: bstein-dev-home-admin
- name: KEYCLOAK_ADMIN_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: bstein-dev-home-keycloak-admin
key: client_secret
- name: ACCOUNT_ALLOWED_GROUPS - name: ACCOUNT_ALLOWED_GROUPS
value: "" value: ""
- name: PORTAL_DATABASE_URL
valueFrom:
secretKeyRef:
name: atlas-portal-db
key: PORTAL_DATABASE_URL
- name: HTTP_CHECK_TIMEOUT_SEC - name: HTTP_CHECK_TIMEOUT_SEC
value: "2" value: "2"
- name: ACCESS_REQUEST_SUBMIT_RATE_LIMIT - name: ACCESS_REQUEST_SUBMIT_RATE_LIMIT
@ -91,6 +105,22 @@ spec:
value: "60" value: "60"
- name: ACCESS_REQUEST_INTERNAL_EMAIL_ALLOWLIST - name: ACCESS_REQUEST_INTERNAL_EMAIL_ALLOWLIST
value: robotuser@bstein.dev value: robotuser@bstein.dev
- name: WGER_NAMESPACE
value: health
- name: WGER_USER_SYNC_CRONJOB
value: wger-user-sync
- name: WGER_USER_SYNC_WAIT_TIMEOUT_SEC
value: "90"
- name: FIREFLY_NAMESPACE
value: finance
- name: FIREFLY_USER_SYNC_CRONJOB
value: firefly-user-sync
- name: FIREFLY_USER_SYNC_WAIT_TIMEOUT_SEC
value: "90"
- name: VAULTWARDEN_ADMIN_SESSION_TTL_SEC
value: "900"
- name: VAULTWARDEN_ADMIN_RATE_LIMIT_BACKOFF_SEC
value: "60"
ports: ports:
- name: http - name: http
containerPort: 8080 containerPort: 8080

View File

@ -1,3 +1,4 @@
# services/bstein-dev-home/backend-service.yaml
apiVersion: v1 apiVersion: v1
kind: Service kind: Service
metadata: metadata:

View File

@ -14,7 +14,27 @@ spec:
metadata: metadata:
labels: labels:
app: chat-ai-gateway app: chat-ai-gateway
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/role: "bstein-dev-home"
vault.hashicorp.com/agent-inject-secret-portal-env.sh: "kv/data/atlas/portal/atlas-portal-db"
vault.hashicorp.com/agent-inject-template-portal-env.sh: |
{{ with secret "kv/data/atlas/portal/atlas-portal-db" }}
export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}"
{{ end }}
{{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }}
export KEYCLOAK_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/chat-ai-keys-runtime" }}
export CHAT_KEY_MATRIX="{{ .Data.data.matrix }}"
export CHAT_KEY_HOMEPAGE="{{ .Data.data.homepage }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/portal-e2e-client" }}
export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}"
export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}"
{{ end }}
spec: spec:
serviceAccountName: bstein-dev-home
nodeSelector: nodeSelector:
kubernetes.io/arch: arm64 kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
@ -23,20 +43,10 @@ spec:
image: python:3.11-slim image: python:3.11-slim
command: ["/bin/sh","-c"] command: ["/bin/sh","-c"]
args: args:
- python /app/gateway.py - . /vault/secrets/portal-env.sh && exec python /app/gateway.py
env: env:
- name: UPSTREAM_URL - name: UPSTREAM_URL
value: http://bstein-dev-home-backend/api/chat value: http://bstein-dev-home-backend/api/chat
- name: CHAT_KEY_MATRIX
valueFrom:
secretKeyRef:
name: chat-ai-keys-runtime
key: matrix
- name: CHAT_KEY_HOMEPAGE
valueFrom:
secretKeyRef:
name: chat-ai-keys-runtime
key: homepage
ports: ports:
- name: http - name: http
containerPort: 8080 containerPort: 8080

View File

@ -19,10 +19,10 @@ spec:
kubernetes.io/arch: arm64 kubernetes.io/arch: arm64
node-role.kubernetes.io/worker: "true" node-role.kubernetes.io/worker: "true"
imagePullSecrets: imagePullSecrets:
- name: harbor-bstein-robot - name: harbor-regcred
containers: containers:
- name: frontend - name: frontend
image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-95
imagePullPolicy: Always imagePullPolicy: Always
ports: ports:
- name: http - name: http

View File

@ -1,3 +1,4 @@
# services/bstein-dev-home/frontend-service.yaml
apiVersion: v1 apiVersion: v1
kind: Service kind: Service
metadata: metadata:

View File

@ -6,7 +6,9 @@ resources:
- namespace.yaml - namespace.yaml
- image.yaml - image.yaml
- rbac.yaml - rbac.yaml
- portal-e2e-client-secret-sync-rbac.yaml - vault-serviceaccount.yaml
- secretproviderclass.yaml
- vault-sync-deployment.yaml
- chat-ai-gateway-deployment.yaml - chat-ai-gateway-deployment.yaml
- chat-ai-gateway-service.yaml - chat-ai-gateway-service.yaml
- frontend-deployment.yaml - frontend-deployment.yaml
@ -18,9 +20,9 @@ resources:
- ingress.yaml - ingress.yaml
images: images:
- name: registry.bstein.dev/bstein/bstein-dev-home-frontend - name: registry.bstein.dev/bstein/bstein-dev-home-frontend
newTag: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"} newTag: 0.1.1-102 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"}
- name: registry.bstein.dev/bstein/bstein-dev-home-backend - name: registry.bstein.dev/bstein/bstein-dev-home-backend
newTag: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"} newTag: 0.1.1-103 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"}
configMapGenerator: configMapGenerator:
- name: chat-ai-gateway - name: chat-ai-gateway
namespace: bstein-dev-home namespace: bstein-dev-home

View File

@ -1,3 +1,4 @@
# services/bstein-dev-home/namespace.yaml
apiVersion: v1 apiVersion: v1
kind: Namespace kind: Namespace
metadata: metadata:

View File

@ -2,13 +2,49 @@
apiVersion: batch/v1 apiVersion: batch/v1
kind: Job kind: Job
metadata: metadata:
name: portal-onboarding-e2e-test-11 name: portal-onboarding-e2e-test-19
namespace: bstein-dev-home namespace: bstein-dev-home
spec: spec:
backoffLimit: 0 backoffLimit: 0
template: template:
metadata:
annotations:
vault.hashicorp.com/agent-inject: "true"
vault.hashicorp.com/agent-pre-populate-only: "true"
vault.hashicorp.com/role: "bstein-dev-home"
vault.hashicorp.com/agent-inject-secret-portal-env.sh: "kv/data/atlas/portal/atlas-portal-db"
vault.hashicorp.com/agent-inject-template-portal-env.sh: |
{{ with secret "kv/data/atlas/portal/atlas-portal-db" }}
export PORTAL_DATABASE_URL="{{ .Data.data.PORTAL_DATABASE_URL }}"
{{ end }}
{{ with secret "kv/data/atlas/portal/bstein-dev-home-keycloak-admin" }}
export KEYCLOAK_ADMIN_CLIENT_SECRET="{{ .Data.data.client_secret }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/chat-ai-keys-runtime" }}
export CHAT_KEY_MATRIX="{{ .Data.data.matrix }}"
export CHAT_KEY_HOMEPAGE="{{ .Data.data.homepage }}"
{{ end }}
{{ with secret "kv/data/atlas/shared/portal-e2e-client" }}
export PORTAL_E2E_CLIENT_ID="{{ .Data.data.client_id }}"
export PORTAL_E2E_CLIENT_SECRET="{{ .Data.data.client_secret }}"
{{ end }}
spec: spec:
restartPolicy: Never restartPolicy: Never
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: node-role.kubernetes.io/worker
operator: Exists
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
preference:
matchExpressions:
- key: kubernetes.io/arch
operator: In
values: ["arm64"]
serviceAccountName: bstein-dev-home
containers: containers:
- name: test - name: test
image: python:3.11-slim image: python:3.11-slim
@ -21,21 +57,6 @@ spec:
value: atlas value: atlas
- name: KEYCLOAK_ADMIN_CLIENT_ID - name: KEYCLOAK_ADMIN_CLIENT_ID
value: bstein-dev-home-admin value: bstein-dev-home-admin
- name: KEYCLOAK_ADMIN_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: bstein-dev-home-keycloak-admin
key: client_secret
- name: PORTAL_E2E_CLIENT_ID
valueFrom:
secretKeyRef:
name: portal-e2e-client
key: client_id
- name: PORTAL_E2E_CLIENT_SECRET
valueFrom:
secretKeyRef:
name: portal-e2e-client
key: client_secret
- name: PORTAL_TARGET_CLIENT_ID - name: PORTAL_TARGET_CLIENT_ID
value: bstein-dev-home value: bstein-dev-home
- name: E2E_PORTAL_ADMIN_USERNAME - name: E2E_PORTAL_ADMIN_USERNAME
@ -53,7 +74,8 @@ spec:
command: ["/bin/sh", "-c"] command: ["/bin/sh", "-c"]
args: args:
- | - |
set -euo pipefail set -eu
. /vault/secrets/portal-env.sh
python /scripts/test_portal_onboarding_flow.py python /scripts/test_portal_onboarding_flow.py
volumeMounts: volumeMounts:
- name: tests - name: tests

View File

@ -106,3 +106,65 @@ subjects:
- kind: ServiceAccount - kind: ServiceAccount
name: bstein-dev-home name: bstein-dev-home
namespace: bstein-dev-home namespace: bstein-dev-home
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: bstein-dev-home-wger-user-sync
namespace: health
rules:
- apiGroups: ["batch"]
resources: ["cronjobs"]
verbs: ["get"]
resourceNames: ["wger-user-sync"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["create", "get", "list", "watch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: bstein-dev-home-wger-user-sync
namespace: health
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: bstein-dev-home-wger-user-sync
subjects:
- kind: ServiceAccount
name: bstein-dev-home
namespace: bstein-dev-home
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: bstein-dev-home-firefly-user-sync
namespace: finance
rules:
- apiGroups: ["batch"]
resources: ["cronjobs"]
verbs: ["get"]
resourceNames: ["firefly-user-sync"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["create", "get", "list", "watch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: bstein-dev-home-firefly-user-sync
namespace: finance
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: bstein-dev-home-firefly-user-sync
subjects:
- kind: ServiceAccount
name: bstein-dev-home
namespace: bstein-dev-home

View File

@ -65,6 +65,23 @@ def _get_json(url: str, headers: dict[str, str] | None = None, timeout_s: int =
raise SystemExit(f"HTTP {exc.code} from {url}: {raw}") raise SystemExit(f"HTTP {exc.code} from {url}: {raw}")
def _wait_for_portal_ready(base_url: str, timeout_s: int = 60) -> None:
health_url = f"{base_url.rstrip('/')}/api/healthz"
deadline_at = time.monotonic() + timeout_s
last_error = None
while time.monotonic() < deadline_at:
try:
req = urllib.request.Request(health_url, method="GET")
with urllib.request.urlopen(req, timeout=10) as resp:
if resp.status == 200:
return
except Exception as exc:
last_error = str(exc)
time.sleep(2)
suffix = f" (last_error={last_error})" if last_error else ""
raise SystemExit(f"portal health check timed out{suffix}")
def _request_json( def _request_json(
method: str, method: str,
url: str, url: str,
@ -235,6 +252,7 @@ def _imap_wait_for_verify_token(
def main() -> int: def main() -> int:
portal_base = _env("PORTAL_BASE_URL").rstrip("/") portal_base = _env("PORTAL_BASE_URL").rstrip("/")
portal_ready_timeout = int(os.environ.get("E2E_PORTAL_READY_TIMEOUT_SECONDS", "60"))
keycloak_base = _env("KEYCLOAK_ADMIN_URL").rstrip("/") keycloak_base = _env("KEYCLOAK_ADMIN_URL").rstrip("/")
realm = _env("KEYCLOAK_REALM", "atlas") realm = _env("KEYCLOAK_REALM", "atlas")
@ -249,7 +267,7 @@ def main() -> int:
if not contact_email: if not contact_email:
raise SystemExit("E2E_CONTACT_EMAIL must not be empty") raise SystemExit("E2E_CONTACT_EMAIL must not be empty")
imap_host = os.environ.get("E2E_IMAP_HOST", "mailu-front.mailu-mailserver.svc.cluster.local").strip() imap_host = os.environ.get("E2E_IMAP_HOST", "mail.bstein.dev").strip()
imap_port = int(os.environ.get("E2E_IMAP_PORT", "993")) imap_port = int(os.environ.get("E2E_IMAP_PORT", "993"))
imap_keycloak_username = os.environ.get("E2E_IMAP_KEYCLOAK_USERNAME", "robotuser").strip() imap_keycloak_username = os.environ.get("E2E_IMAP_KEYCLOAK_USERNAME", "robotuser").strip()
imap_wait_sec = int(os.environ.get("E2E_IMAP_WAIT_SECONDS", "90")) imap_wait_sec = int(os.environ.get("E2E_IMAP_WAIT_SECONDS", "90"))
@ -274,6 +292,8 @@ def main() -> int:
if not mailu_password: if not mailu_password:
raise SystemExit(f"Keycloak user {imap_keycloak_username!r} missing mailu_app_password attribute") raise SystemExit(f"Keycloak user {imap_keycloak_username!r} missing mailu_app_password attribute")
_wait_for_portal_ready(portal_base, timeout_s=portal_ready_timeout)
username_prefix = os.environ.get("E2E_USERNAME_PREFIX", "e2e-user") username_prefix = os.environ.get("E2E_USERNAME_PREFIX", "e2e-user")
now = int(time.time()) now = int(time.time())
username = f"{username_prefix}-{now}" username = f"{username_prefix}-{now}"
@ -336,6 +356,8 @@ def main() -> int:
except SystemExit as exc: except SystemExit as exc:
raise SystemExit(f"failed to exchange token for portal approval as {portal_admin_username!r}: {exc}") raise SystemExit(f"failed to exchange token for portal approval as {portal_admin_username!r}: {exc}")
_wait_for_portal_ready(portal_base, timeout_s=portal_ready_timeout)
approve_url = f"{portal_base}/api/admin/access/requests/{urllib.parse.quote(username, safe='')}/approve" approve_url = f"{portal_base}/api/admin/access/requests/{urllib.parse.quote(username, safe='')}/approve"
approve_timeout_s = int(os.environ.get("E2E_APPROVE_TIMEOUT_SECONDS", "180")) approve_timeout_s = int(os.environ.get("E2E_APPROVE_TIMEOUT_SECONDS", "180"))
approve_attempts = int(os.environ.get("E2E_APPROVE_ATTEMPTS", "3")) approve_attempts = int(os.environ.get("E2E_APPROVE_ATTEMPTS", "3"))
@ -348,6 +370,10 @@ def main() -> int:
break break
except (http.client.RemoteDisconnected, TimeoutError, urllib.error.URLError) as exc: except (http.client.RemoteDisconnected, TimeoutError, urllib.error.URLError) as exc:
approve_error = str(exc) approve_error = str(exc)
try:
_wait_for_portal_ready(portal_base, timeout_s=min(30, portal_ready_timeout))
except SystemExit:
pass
if attempt == approve_attempts: if attempt == approve_attempts:
break break
time.sleep(3) time.sleep(3)

View File

@ -2,8 +2,10 @@
from __future__ import annotations from __future__ import annotations
import os
import sys import sys
import time import time
from datetime import datetime, timezone
from typing import Any, Iterable from typing import Any, Iterable
import httpx import httpx
@ -16,6 +18,8 @@ from atlas_portal.vaultwarden import invite_user
VAULTWARDEN_EMAIL_ATTR = "vaultwarden_email" VAULTWARDEN_EMAIL_ATTR = "vaultwarden_email"
VAULTWARDEN_STATUS_ATTR = "vaultwarden_status" VAULTWARDEN_STATUS_ATTR = "vaultwarden_status"
VAULTWARDEN_SYNCED_AT_ATTR = "vaultwarden_synced_at" VAULTWARDEN_SYNCED_AT_ATTR = "vaultwarden_synced_at"
VAULTWARDEN_RETRY_COOLDOWN_SEC = int(os.getenv("VAULTWARDEN_RETRY_COOLDOWN_SEC", "1800"))
VAULTWARDEN_FAILURE_BAILOUT = int(os.getenv("VAULTWARDEN_FAILURE_BAILOUT", "2"))
def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]: def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]:
@ -26,14 +30,22 @@ def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]:
url = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users" url = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users"
first = 0 first = 0
while True: while True:
headers = client.headers() headers = _headers_with_retry(client)
# We need attributes for idempotency (vaultwarden_status/vaultwarden_email). Keycloak defaults to a # We need attributes for idempotency (vaultwarden_status/vaultwarden_email). Keycloak defaults to a
# brief representation which may omit these. # brief representation which may omit these.
params = {"first": str(first), "max": str(page_size), "briefRepresentation": "false"} params = {"first": str(first), "max": str(page_size), "briefRepresentation": "false"}
with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http: payload = None
resp = http.get(url, params=params, headers=headers) for attempt in range(1, 6):
resp.raise_for_status() try:
payload = resp.json() with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http:
resp = http.get(url, params=params, headers=headers)
resp.raise_for_status()
payload = resp.json()
break
except httpx.HTTPError as exc:
if attempt == 5:
raise
time.sleep(attempt * 2)
if not isinstance(payload, list) or not payload: if not isinstance(payload, list) or not payload:
return return
@ -47,6 +59,19 @@ def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]:
first += page_size first += page_size
def _headers_with_retry(client, attempts: int = 6) -> dict[str, str]:
last_exc: Exception | None = None
for attempt in range(1, attempts + 1):
try:
return client.headers()
except Exception as exc:
last_exc = exc
time.sleep(attempt * 2)
if last_exc:
raise last_exc
raise RuntimeError("failed to fetch keycloak headers")
def _extract_attr(attrs: Any, key: str) -> str: def _extract_attr(attrs: Any, key: str) -> str:
if not isinstance(attrs, dict): if not isinstance(attrs, dict):
return "" return ""
@ -61,6 +86,21 @@ def _extract_attr(attrs: Any, key: str) -> str:
return "" return ""
def _parse_synced_at(value: str) -> float | None:
value = (value or "").strip()
if not value:
return None
for fmt in ("%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S%z"):
try:
parsed = datetime.strptime(value, fmt)
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=timezone.utc)
return parsed.timestamp()
except ValueError:
continue
return None
def _vaultwarden_email_for_user(user: dict[str, Any]) -> str: def _vaultwarden_email_for_user(user: dict[str, Any]) -> str:
username = (user.get("username") if isinstance(user.get("username"), str) else "") or "" username = (user.get("username") if isinstance(user.get("username"), str) else "") or ""
username = username.strip() username = username.strip()
@ -108,6 +148,7 @@ def main() -> int:
created = 0 created = 0
skipped = 0 skipped = 0
failures = 0 failures = 0
consecutive_failures = 0
for user in _iter_keycloak_users(): for user in _iter_keycloak_users():
username = (user.get("username") if isinstance(user.get("username"), str) else "") or "" username = (user.get("username") if isinstance(user.get("username"), str) else "") or ""
@ -137,6 +178,11 @@ def main() -> int:
current_status = _extract_attr(full_user.get("attributes"), VAULTWARDEN_STATUS_ATTR) current_status = _extract_attr(full_user.get("attributes"), VAULTWARDEN_STATUS_ATTR)
current_synced_at = _extract_attr(full_user.get("attributes"), VAULTWARDEN_SYNCED_AT_ATTR) current_synced_at = _extract_attr(full_user.get("attributes"), VAULTWARDEN_SYNCED_AT_ATTR)
current_synced_ts = _parse_synced_at(current_synced_at)
if current_status in {"rate_limited", "error"} and current_synced_ts:
if time.time() - current_synced_ts < VAULTWARDEN_RETRY_COOLDOWN_SEC:
skipped += 1
continue
email = _vaultwarden_email_for_user(full_user) email = _vaultwarden_email_for_user(full_user)
if not email: if not email:
print(f"skip {username}: missing email", file=sys.stderr) print(f"skip {username}: missing email", file=sys.stderr)
@ -167,6 +213,7 @@ def main() -> int:
result = invite_user(email) result = invite_user(email)
if result.ok: if result.ok:
created += 1 created += 1
consecutive_failures = 0
print(f"ok {username}: {result.status}") print(f"ok {username}: {result.status}")
try: try:
_set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status) _set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status)
@ -175,12 +222,17 @@ def main() -> int:
pass pass
else: else:
failures += 1 failures += 1
if result.status in {"rate_limited", "error"}:
consecutive_failures += 1
print(f"err {username}: {result.status} {result.detail}", file=sys.stderr) print(f"err {username}: {result.status} {result.detail}", file=sys.stderr)
try: try:
_set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status) _set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status)
_set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())) _set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
except Exception: except Exception:
pass pass
if consecutive_failures >= VAULTWARDEN_FAILURE_BAILOUT:
print("vaultwarden: too many consecutive failures; aborting run", file=sys.stderr)
break
print( print(
f"done processed={processed} created_or_present={created} skipped={skipped} failures={failures}", f"done processed={processed} created_or_present={created} skipped={skipped} failures={failures}",

View File

@ -0,0 +1,21 @@
# services/bstein-dev-home/secretproviderclass.yaml
apiVersion: secrets-store.csi.x-k8s.io/v1
kind: SecretProviderClass
metadata:
name: bstein-dev-home-vault
namespace: bstein-dev-home
spec:
provider: vault
parameters:
vaultAddress: "http://vault.vault.svc.cluster.local:8200"
roleName: "bstein-dev-home"
objects: |
- objectName: "harbor-pull__dockerconfigjson"
secretPath: "kv/data/atlas/harbor-pull/bstein-dev-home"
secretKey: "dockerconfigjson"
secretObjects:
- secretName: harbor-regcred
type: kubernetes.io/dockerconfigjson
data:
- objectName: harbor-pull__dockerconfigjson
key: .dockerconfigjson

Some files were not shown because too many files have changed in this diff Show More