feature/sso-hardening #9
6
.gitignore
vendored
6
.gitignore
vendored
@ -1,2 +1,8 @@
|
|||||||
*.md
|
*.md
|
||||||
!README.md
|
!README.md
|
||||||
|
!knowledge/**/*.md
|
||||||
|
!services/comms/knowledge/**/*.md
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
.pytest_cache
|
||||||
|
.venv
|
||||||
|
|||||||
@ -5,8 +5,9 @@ resources:
|
|||||||
- ../../services/crypto
|
- ../../services/crypto
|
||||||
- ../../services/gitea
|
- ../../services/gitea
|
||||||
- ../../services/jellyfin
|
- ../../services/jellyfin
|
||||||
- ../../services/jitsi
|
- ../../services/comms
|
||||||
- ../../services/monitoring
|
- ../../services/monitoring
|
||||||
|
- ../../services/logging
|
||||||
- ../../services/pegasus
|
- ../../services/pegasus
|
||||||
- ../../services/vault
|
- ../../services/vault
|
||||||
- ../../services/bstein-dev-home
|
- ../../services/bstein-dev-home
|
||||||
|
|||||||
@ -0,0 +1,23 @@
|
|||||||
|
# clusters/atlas/flux-system/applications/ai-llm/kustomization.yaml
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: ai-llm
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 10m
|
||||||
|
path: ./services/ai-llm
|
||||||
|
targetNamespace: ai
|
||||||
|
prune: true
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
namespace: flux-system
|
||||||
|
wait: true
|
||||||
|
healthChecks:
|
||||||
|
- apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
name: ollama
|
||||||
|
namespace: ai
|
||||||
|
dependsOn:
|
||||||
|
- name: core
|
||||||
@ -1,26 +0,0 @@
|
|||||||
# clusters/atlas/flux-system/applications/ci-demo/image-automation.yaml
|
|
||||||
apiVersion: image.toolkit.fluxcd.io/v1
|
|
||||||
kind: ImageUpdateAutomation
|
|
||||||
metadata:
|
|
||||||
name: ci-demo
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
interval: 1m0s
|
|
||||||
sourceRef:
|
|
||||||
kind: GitRepository
|
|
||||||
name: flux-system
|
|
||||||
namespace: flux-system
|
|
||||||
git:
|
|
||||||
checkout:
|
|
||||||
ref:
|
|
||||||
branch: feature/ci-gitops
|
|
||||||
commit:
|
|
||||||
author:
|
|
||||||
email: ops@bstein.dev
|
|
||||||
name: flux-bot
|
|
||||||
messageTemplate: "chore(ci-demo): apply image updates"
|
|
||||||
push:
|
|
||||||
branch: feature/ci-gitops
|
|
||||||
update:
|
|
||||||
strategy: Setters
|
|
||||||
path: services/ci-demo
|
|
||||||
@ -0,0 +1,17 @@
|
|||||||
|
# clusters/atlas/flux-system/applications/communication/kustomization.yaml
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: comms
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 10m
|
||||||
|
prune: true
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
path: ./services/comms
|
||||||
|
targetNamespace: comms
|
||||||
|
timeout: 2m
|
||||||
|
dependsOn:
|
||||||
|
- name: traefik
|
||||||
@ -15,5 +15,6 @@ spec:
|
|||||||
namespace: flux-system
|
namespace: flux-system
|
||||||
dependsOn:
|
dependsOn:
|
||||||
- name: core
|
- name: core
|
||||||
|
- name: openldap
|
||||||
wait: true
|
wait: true
|
||||||
timeout: 5m
|
timeout: 5m
|
||||||
|
|||||||
@ -16,8 +16,12 @@ spec:
|
|||||||
- name: helm
|
- name: helm
|
||||||
- name: traefik
|
- name: traefik
|
||||||
healthChecks:
|
healthChecks:
|
||||||
- apiVersion: helm.toolkit.fluxcd.io/v2
|
- apiVersion: apps/v1
|
||||||
kind: HelmRelease
|
kind: Deployment
|
||||||
|
name: jenkins
|
||||||
|
namespace: jenkins
|
||||||
|
- apiVersion: v1
|
||||||
|
kind: Service
|
||||||
name: jenkins
|
name: jenkins
|
||||||
namespace: jenkins
|
namespace: jenkins
|
||||||
wait: false
|
wait: false
|
||||||
|
|||||||
@ -4,7 +4,8 @@ kind: Kustomization
|
|||||||
resources:
|
resources:
|
||||||
- gitea/kustomization.yaml
|
- gitea/kustomization.yaml
|
||||||
- vault/kustomization.yaml
|
- vault/kustomization.yaml
|
||||||
- jitsi/kustomization.yaml
|
- vaultwarden/kustomization.yaml
|
||||||
|
- comms/kustomization.yaml
|
||||||
- crypto/kustomization.yaml
|
- crypto/kustomization.yaml
|
||||||
- monerod/kustomization.yaml
|
- monerod/kustomization.yaml
|
||||||
- pegasus/kustomization.yaml
|
- pegasus/kustomization.yaml
|
||||||
@ -16,9 +17,14 @@ resources:
|
|||||||
- jellyfin/kustomization.yaml
|
- jellyfin/kustomization.yaml
|
||||||
- xmr-miner/kustomization.yaml
|
- xmr-miner/kustomization.yaml
|
||||||
- sui-metrics/kustomization.yaml
|
- sui-metrics/kustomization.yaml
|
||||||
|
- openldap/kustomization.yaml
|
||||||
- keycloak/kustomization.yaml
|
- keycloak/kustomization.yaml
|
||||||
- oauth2-proxy/kustomization.yaml
|
- oauth2-proxy/kustomization.yaml
|
||||||
- mailu/kustomization.yaml
|
- mailu/kustomization.yaml
|
||||||
- jenkins/kustomization.yaml
|
- jenkins/kustomization.yaml
|
||||||
- ci-demo/kustomization.yaml
|
- ai-llm/kustomization.yaml
|
||||||
- ci-demo/image-automation.yaml
|
- nextcloud/kustomization.yaml
|
||||||
|
- nextcloud-mail-sync/kustomization.yaml
|
||||||
|
- postgres/kustomization.yaml
|
||||||
|
- outline/kustomization.yaml
|
||||||
|
- planka/kustomization.yaml
|
||||||
|
|||||||
@ -0,0 +1,17 @@
|
|||||||
|
# clusters/atlas/flux-system/applications/nextcloud-mail-sync/kustomization.yaml
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: nextcloud-mail-sync
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 10m
|
||||||
|
prune: true
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
path: ./services/nextcloud-mail-sync
|
||||||
|
targetNamespace: nextcloud
|
||||||
|
timeout: 2m
|
||||||
|
dependsOn:
|
||||||
|
- name: keycloak
|
||||||
@ -0,0 +1,16 @@
|
|||||||
|
# clusters/atlas/flux-system/applications/nextcloud/kustomization.yaml
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1beta2
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: nextcloud
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 10m
|
||||||
|
path: ./services/nextcloud
|
||||||
|
targetNamespace: nextcloud
|
||||||
|
prune: true
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
namespace: flux-system
|
||||||
|
wait: true
|
||||||
@ -1,18 +1,18 @@
|
|||||||
# clusters/atlas/flux-system/applications/jitsi/kustomization.yaml
|
# clusters/atlas/flux-system/applications/openldap/kustomization.yaml
|
||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
kind: Kustomization
|
kind: Kustomization
|
||||||
metadata:
|
metadata:
|
||||||
name: jitsi
|
name: openldap
|
||||||
namespace: flux-system
|
namespace: flux-system
|
||||||
spec:
|
spec:
|
||||||
interval: 10m
|
interval: 10m
|
||||||
path: ./services/jitsi
|
|
||||||
targetNamespace: jitsi
|
|
||||||
prune: true
|
prune: true
|
||||||
sourceRef:
|
sourceRef:
|
||||||
kind: GitRepository
|
kind: GitRepository
|
||||||
name: flux-system
|
name: flux-system
|
||||||
namespace: flux-system
|
namespace: flux-system
|
||||||
|
path: ./services/openldap
|
||||||
|
targetNamespace: sso
|
||||||
dependsOn:
|
dependsOn:
|
||||||
- name: core
|
- name: core
|
||||||
wait: true
|
wait: true
|
||||||
@ -0,0 +1,28 @@
|
|||||||
|
# clusters/atlas/flux-system/applications/outline/kustomization.yaml
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: outline
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 10m
|
||||||
|
path: ./services/outline
|
||||||
|
prune: true
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
targetNamespace: outline
|
||||||
|
dependsOn:
|
||||||
|
- name: keycloak
|
||||||
|
- name: mailu
|
||||||
|
- name: traefik
|
||||||
|
healthChecks:
|
||||||
|
- apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
name: outline
|
||||||
|
namespace: outline
|
||||||
|
- apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
name: outline
|
||||||
|
namespace: outline
|
||||||
|
wait: false
|
||||||
@ -0,0 +1,28 @@
|
|||||||
|
# clusters/atlas/flux-system/applications/planka/kustomization.yaml
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: planka
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 10m
|
||||||
|
path: ./services/planka
|
||||||
|
prune: true
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
targetNamespace: planka
|
||||||
|
dependsOn:
|
||||||
|
- name: keycloak
|
||||||
|
- name: mailu
|
||||||
|
- name: traefik
|
||||||
|
healthChecks:
|
||||||
|
- apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
name: planka
|
||||||
|
namespace: planka
|
||||||
|
- apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
name: planka
|
||||||
|
namespace: planka
|
||||||
|
wait: false
|
||||||
@ -0,0 +1,24 @@
|
|||||||
|
# clusters/atlas/flux-system/applications/postgres/kustomization.yaml
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: postgres
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 10m
|
||||||
|
path: ./services/postgres
|
||||||
|
prune: true
|
||||||
|
force: true
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
targetNamespace: postgres
|
||||||
|
dependsOn:
|
||||||
|
- name: vault
|
||||||
|
- name: vault-csi
|
||||||
|
healthChecks:
|
||||||
|
- apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
name: postgres
|
||||||
|
namespace: postgres
|
||||||
|
wait: true
|
||||||
@ -0,0 +1,20 @@
|
|||||||
|
# clusters/atlas/flux-system/applications/vaultwarden/kustomization.yaml
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: vaultwarden
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 10m
|
||||||
|
suspend: false
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
namespace: flux-system
|
||||||
|
path: ./services/vaultwarden
|
||||||
|
targetNamespace: vaultwarden
|
||||||
|
prune: true
|
||||||
|
wait: true
|
||||||
|
dependsOn:
|
||||||
|
- name: helm
|
||||||
|
- name: traefik
|
||||||
@ -8,7 +8,7 @@ metadata:
|
|||||||
spec:
|
spec:
|
||||||
interval: 1m0s
|
interval: 1m0s
|
||||||
ref:
|
ref:
|
||||||
branch: main
|
branch: feature/sso-hardening
|
||||||
secretRef:
|
secretRef:
|
||||||
name: flux-system-gitea
|
name: flux-system-gitea
|
||||||
url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git
|
url: ssh://git@scm.bstein.dev:2242/bstein/titan-iac.git
|
||||||
|
|||||||
@ -4,7 +4,11 @@ kind: Kustomization
|
|||||||
resources:
|
resources:
|
||||||
- core/kustomization.yaml
|
- core/kustomization.yaml
|
||||||
- helm/kustomization.yaml
|
- helm/kustomization.yaml
|
||||||
|
- metallb/kustomization.yaml
|
||||||
- traefik/kustomization.yaml
|
- traefik/kustomization.yaml
|
||||||
- gitops-ui/kustomization.yaml
|
- gitops-ui/kustomization.yaml
|
||||||
- monitoring/kustomization.yaml
|
- monitoring/kustomization.yaml
|
||||||
|
- logging/kustomization.yaml
|
||||||
|
- maintenance/kustomization.yaml
|
||||||
- longhorn-ui/kustomization.yaml
|
- longhorn-ui/kustomization.yaml
|
||||||
|
- ../platform/vault-csi/kustomization.yaml
|
||||||
|
|||||||
@ -0,0 +1,14 @@
|
|||||||
|
# clusters/atlas/flux-system/platform/logging/kustomization.yaml
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: logging
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 10m
|
||||||
|
path: ./services/logging
|
||||||
|
prune: true
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
wait: false
|
||||||
@ -1,17 +1,14 @@
|
|||||||
# clusters/atlas/flux-system/applications/ci-demo/kustomization.yaml
|
# clusters/atlas/flux-system/platform/maintenance/kustomization.yaml
|
||||||
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
kind: Kustomization
|
kind: Kustomization
|
||||||
metadata:
|
metadata:
|
||||||
name: ci-demo
|
name: maintenance
|
||||||
namespace: flux-system
|
namespace: flux-system
|
||||||
spec:
|
spec:
|
||||||
interval: 10m
|
interval: 10m
|
||||||
path: ./services/ci-demo
|
path: ./services/maintenance
|
||||||
prune: true
|
prune: true
|
||||||
sourceRef:
|
sourceRef:
|
||||||
kind: GitRepository
|
kind: GitRepository
|
||||||
name: flux-system
|
name: flux-system
|
||||||
namespace: flux-system
|
|
||||||
dependsOn:
|
|
||||||
- name: core
|
|
||||||
wait: false
|
wait: false
|
||||||
@ -0,0 +1,16 @@
|
|||||||
|
# clusters/atlas/flux-system/platform/metallb/kustomization.yaml
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: metallb
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 30m
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
namespace: flux-system
|
||||||
|
path: ./infrastructure/metallb
|
||||||
|
prune: true
|
||||||
|
wait: true
|
||||||
|
targetNamespace: metallb-system
|
||||||
@ -15,4 +15,5 @@ spec:
|
|||||||
namespace: flux-system
|
namespace: flux-system
|
||||||
dependsOn:
|
dependsOn:
|
||||||
- name: core
|
- name: core
|
||||||
|
- name: metallb
|
||||||
wait: true
|
wait: true
|
||||||
|
|||||||
@ -0,0 +1,16 @@
|
|||||||
|
# clusters/atlas/flux-system/platform/vault-csi/kustomization.yaml
|
||||||
|
apiVersion: kustomize.toolkit.fluxcd.io/v1
|
||||||
|
kind: Kustomization
|
||||||
|
metadata:
|
||||||
|
name: vault-csi
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 30m
|
||||||
|
sourceRef:
|
||||||
|
kind: GitRepository
|
||||||
|
name: flux-system
|
||||||
|
namespace: flux-system
|
||||||
|
path: ./infrastructure/vault-csi
|
||||||
|
prune: true
|
||||||
|
wait: true
|
||||||
|
targetNamespace: kube-system
|
||||||
@ -5,3 +5,4 @@ resources:
|
|||||||
- ../../../infrastructure/modules/base
|
- ../../../infrastructure/modules/base
|
||||||
- ../../../infrastructure/modules/profiles/atlas-ha
|
- ../../../infrastructure/modules/profiles/atlas-ha
|
||||||
- ../../../infrastructure/sources/cert-manager/letsencrypt.yaml
|
- ../../../infrastructure/sources/cert-manager/letsencrypt.yaml
|
||||||
|
- ../../../infrastructure/metallb
|
||||||
|
|||||||
16
dockerfiles/Dockerfile.data-prepper
Normal file
16
dockerfiles/Dockerfile.data-prepper
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
FROM --platform=$BUILDPLATFORM opensearchproject/data-prepper:2.8.0 AS source
|
||||||
|
|
||||||
|
FROM --platform=$TARGETPLATFORM eclipse-temurin:17-jre
|
||||||
|
|
||||||
|
ENV DATA_PREPPER_PATH=/usr/share/data-prepper
|
||||||
|
|
||||||
|
RUN useradd -u 10001 -M -U -d / -s /usr/sbin/nologin data_prepper \
|
||||||
|
&& mkdir -p /var/log/data-prepper
|
||||||
|
|
||||||
|
COPY --from=source /usr/share/data-prepper /usr/share/data-prepper
|
||||||
|
|
||||||
|
RUN chown -R 10001:10001 /usr/share/data-prepper /var/log/data-prepper
|
||||||
|
|
||||||
|
USER 10001
|
||||||
|
WORKDIR /usr/share/data-prepper
|
||||||
|
CMD ["bin/data-prepper"]
|
||||||
@ -1,5 +1,18 @@
|
|||||||
# hosts/roles/titan_jh/tasks/main.yaml
|
# hosts/roles/titan_jh/tasks/main.yaml
|
||||||
---
|
---
|
||||||
|
- name: Install node exporter
|
||||||
|
ansible.builtin.package:
|
||||||
|
name: prometheus-node-exporter
|
||||||
|
state: present
|
||||||
|
tags: ['jumphost', 'monitoring']
|
||||||
|
|
||||||
|
- name: Enable node exporter
|
||||||
|
ansible.builtin.service:
|
||||||
|
name: prometheus-node-exporter
|
||||||
|
enabled: true
|
||||||
|
state: started
|
||||||
|
tags: ['jumphost', 'monitoring']
|
||||||
|
|
||||||
- name: Placeholder for jumphost hardening
|
- name: Placeholder for jumphost hardening
|
||||||
ansible.builtin.debug:
|
ansible.builtin.debug:
|
||||||
msg: "Harden SSH, manage bastion tooling, and configure audit logging here."
|
msg: "Harden SSH, manage bastion tooling, and configure audit logging here."
|
||||||
|
|||||||
20
infrastructure/metallb/ippool.yaml
Normal file
20
infrastructure/metallb/ippool.yaml
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# infrastructure/metallb/ippool.yaml
|
||||||
|
apiVersion: metallb.io/v1beta1
|
||||||
|
kind: IPAddressPool
|
||||||
|
metadata:
|
||||||
|
name: communication-pool
|
||||||
|
namespace: metallb-system
|
||||||
|
spec:
|
||||||
|
addresses:
|
||||||
|
- 192.168.22.4-192.168.22.6
|
||||||
|
- 192.168.22.9-192.168.22.9
|
||||||
|
autoAssign: true
|
||||||
|
---
|
||||||
|
apiVersion: metallb.io/v1beta1
|
||||||
|
kind: L2Advertisement
|
||||||
|
metadata:
|
||||||
|
name: communication-adv
|
||||||
|
namespace: metallb-system
|
||||||
|
spec:
|
||||||
|
ipAddressPools:
|
||||||
|
- communication-pool
|
||||||
10
infrastructure/metallb/kustomization.yaml
Normal file
10
infrastructure/metallb/kustomization.yaml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# infrastructure/metallb/kustomization.yaml
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- namespace.yaml
|
||||||
|
- metallb-rendered.yaml
|
||||||
|
- ippool.yaml
|
||||||
|
patchesStrategicMerge:
|
||||||
|
- patches/node-placement.yaml
|
||||||
|
- patches/speaker-loglevel.yaml
|
||||||
2411
infrastructure/metallb/metallb-rendered.yaml
Normal file
2411
infrastructure/metallb/metallb-rendered.yaml
Normal file
File diff suppressed because it is too large
Load Diff
5
infrastructure/metallb/namespace.yaml
Normal file
5
infrastructure/metallb/namespace.yaml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# infrastructure/metallb/namespace.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: metallb-system
|
||||||
27
infrastructure/metallb/patches/node-placement.yaml
Normal file
27
infrastructure/metallb/patches/node-placement.yaml
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
# infrastructure/metallb/patches/node-placement.yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: metallb-controller
|
||||||
|
namespace: metallb-system
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: controller
|
||||||
|
args:
|
||||||
|
- --port=7472
|
||||||
|
- --log-level=info
|
||||||
|
- --webhook-mode=enabled
|
||||||
|
- --tls-min-version=VersionTLS12
|
||||||
|
- --lb-class=metallb
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: hardware
|
||||||
|
operator: In
|
||||||
|
values:
|
||||||
|
- rpi4
|
||||||
|
- rpi5
|
||||||
15
infrastructure/metallb/patches/speaker-loglevel.yaml
Normal file
15
infrastructure/metallb/patches/speaker-loglevel.yaml
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
# infrastructure/metallb/patches/speaker-loglevel.yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: metallb-speaker
|
||||||
|
namespace: metallb-system
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: speaker
|
||||||
|
args:
|
||||||
|
- --port=7472
|
||||||
|
- --log-level=info
|
||||||
|
- --lb-class=metallb
|
||||||
@ -2,6 +2,7 @@
|
|||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
kind: Kustomization
|
kind: Kustomization
|
||||||
resources:
|
resources:
|
||||||
|
- ../components/device-plugin-config
|
||||||
- ../components/device-plugin-jetson
|
- ../components/device-plugin-jetson
|
||||||
- ../components/device-plugin-minipc
|
- ../components/device-plugin-minipc
|
||||||
- ../components/device-plugin-tethys
|
- ../components/device-plugin-tethys
|
||||||
|
|||||||
@ -0,0 +1,15 @@
|
|||||||
|
# infrastructure/modules/profiles/components/device-plugin-config/configmap.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: nvidia-device-plugin-config
|
||||||
|
namespace: kube-system
|
||||||
|
data:
|
||||||
|
config.yaml: |
|
||||||
|
version: v1
|
||||||
|
sharing:
|
||||||
|
timeSlicing:
|
||||||
|
renameByDefault: true
|
||||||
|
resources:
|
||||||
|
- name: nvidia.com/gpu
|
||||||
|
replicas: 4
|
||||||
@ -0,0 +1,5 @@
|
|||||||
|
# infrastructure/modules/profiles/components/device-plugin-config/kustomization.yaml
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- configmap.yaml
|
||||||
@ -30,7 +30,8 @@ spec:
|
|||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
args:
|
args:
|
||||||
- "--fail-on-init-error=false"
|
- "--fail-on-init-error=false"
|
||||||
- "--device-list-strategy=envvar,cdi"
|
- "--device-list-strategy=envvar"
|
||||||
|
- "--config-file=/config/config.yaml"
|
||||||
securityContext:
|
securityContext:
|
||||||
privileged: true
|
privileged: true
|
||||||
env:
|
env:
|
||||||
@ -41,7 +42,12 @@ spec:
|
|||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: device-plugin
|
- name: device-plugin
|
||||||
mountPath: /var/lib/kubelet/device-plugins
|
mountPath: /var/lib/kubelet/device-plugins
|
||||||
|
- name: config
|
||||||
|
mountPath: /config
|
||||||
volumes:
|
volumes:
|
||||||
- name: device-plugin
|
- name: device-plugin
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /var/lib/kubelet/device-plugins
|
path: /var/lib/kubelet/device-plugins
|
||||||
|
- name: config
|
||||||
|
configMap:
|
||||||
|
name: nvidia-device-plugin-config
|
||||||
|
|||||||
@ -32,6 +32,7 @@ spec:
|
|||||||
- "--fail-on-init-error=false"
|
- "--fail-on-init-error=false"
|
||||||
- "--device-list-strategy=envvar"
|
- "--device-list-strategy=envvar"
|
||||||
- "--mig-strategy=none"
|
- "--mig-strategy=none"
|
||||||
|
- "--config-file=/config/config.yaml"
|
||||||
securityContext:
|
securityContext:
|
||||||
privileged: true
|
privileged: true
|
||||||
env:
|
env:
|
||||||
@ -42,7 +43,12 @@ spec:
|
|||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: device-plugin
|
- name: device-plugin
|
||||||
mountPath: /var/lib/kubelet/device-plugins
|
mountPath: /var/lib/kubelet/device-plugins
|
||||||
|
- name: config
|
||||||
|
mountPath: /config
|
||||||
volumes:
|
volumes:
|
||||||
- name: device-plugin
|
- name: device-plugin
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /var/lib/kubelet/device-plugins
|
path: /var/lib/kubelet/device-plugins
|
||||||
|
- name: config
|
||||||
|
configMap:
|
||||||
|
name: nvidia-device-plugin-config
|
||||||
|
|||||||
@ -33,6 +33,7 @@ spec:
|
|||||||
- "--fail-on-init-error=false"
|
- "--fail-on-init-error=false"
|
||||||
- "--device-list-strategy=envvar"
|
- "--device-list-strategy=envvar"
|
||||||
- "--mig-strategy=none"
|
- "--mig-strategy=none"
|
||||||
|
- "--config-file=/config/config.yaml"
|
||||||
securityContext:
|
securityContext:
|
||||||
privileged: true
|
privileged: true
|
||||||
env:
|
env:
|
||||||
@ -43,7 +44,12 @@ spec:
|
|||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: device-plugin
|
- name: device-plugin
|
||||||
mountPath: /var/lib/kubelet/device-plugins
|
mountPath: /var/lib/kubelet/device-plugins
|
||||||
|
- name: config
|
||||||
|
mountPath: /config
|
||||||
volumes:
|
volumes:
|
||||||
- name: device-plugin
|
- name: device-plugin
|
||||||
hostPath:
|
hostPath:
|
||||||
path: /var/lib/kubelet/device-plugins
|
path: /var/lib/kubelet/device-plugins
|
||||||
|
- name: config
|
||||||
|
configMap:
|
||||||
|
name: nvidia-device-plugin-config
|
||||||
|
|||||||
@ -2,4 +2,5 @@
|
|||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
kind: Kustomization
|
kind: Kustomization
|
||||||
resources:
|
resources:
|
||||||
|
- ../components/device-plugin-config
|
||||||
- ../components/device-plugin-tethys
|
- ../components/device-plugin-tethys
|
||||||
|
|||||||
9
infrastructure/sources/helm/fluent-bit.yaml
Normal file
9
infrastructure/sources/helm/fluent-bit.yaml
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# infrastructure/sources/helm/fluent-bit.yaml
|
||||||
|
apiVersion: source.toolkit.fluxcd.io/v1
|
||||||
|
kind: HelmRepository
|
||||||
|
metadata:
|
||||||
|
name: fluent
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 1h
|
||||||
|
url: https://fluent.github.io/helm-charts
|
||||||
@ -2,11 +2,15 @@
|
|||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
kind: Kustomization
|
kind: Kustomization
|
||||||
resources:
|
resources:
|
||||||
|
- fluent-bit.yaml
|
||||||
- grafana.yaml
|
- grafana.yaml
|
||||||
- hashicorp.yaml
|
- hashicorp.yaml
|
||||||
- jetstack.yaml
|
- jetstack.yaml
|
||||||
- jenkins.yaml
|
- jenkins.yaml
|
||||||
- mailu.yaml
|
- mailu.yaml
|
||||||
|
- opentelemetry.yaml
|
||||||
|
- opensearch.yaml
|
||||||
- harbor.yaml
|
- harbor.yaml
|
||||||
- prometheus.yaml
|
- prometheus.yaml
|
||||||
- victoria-metrics.yaml
|
- victoria-metrics.yaml
|
||||||
|
- secrets-store-csi.yaml
|
||||||
|
|||||||
9
infrastructure/sources/helm/opensearch.yaml
Normal file
9
infrastructure/sources/helm/opensearch.yaml
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# infrastructure/sources/helm/opensearch.yaml
|
||||||
|
apiVersion: source.toolkit.fluxcd.io/v1
|
||||||
|
kind: HelmRepository
|
||||||
|
metadata:
|
||||||
|
name: opensearch
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 1h
|
||||||
|
url: https://opensearch-project.github.io/helm-charts
|
||||||
9
infrastructure/sources/helm/opentelemetry.yaml
Normal file
9
infrastructure/sources/helm/opentelemetry.yaml
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# infrastructure/sources/helm/opentelemetry.yaml
|
||||||
|
apiVersion: source.toolkit.fluxcd.io/v1
|
||||||
|
kind: HelmRepository
|
||||||
|
metadata:
|
||||||
|
name: opentelemetry
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 1h
|
||||||
|
url: https://open-telemetry.github.io/opentelemetry-helm-charts
|
||||||
9
infrastructure/sources/helm/secrets-store-csi.yaml
Normal file
9
infrastructure/sources/helm/secrets-store-csi.yaml
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# infrastructure/sources/helm/secrets-store-csi.yaml
|
||||||
|
apiVersion: source.toolkit.fluxcd.io/v1
|
||||||
|
kind: HelmRepository
|
||||||
|
metadata:
|
||||||
|
name: secrets-store-csi-driver
|
||||||
|
namespace: flux-system
|
||||||
|
spec:
|
||||||
|
interval: 1h
|
||||||
|
url: https://kubernetes-sigs.github.io/secrets-store-csi-driver/charts
|
||||||
@ -71,9 +71,10 @@ rules:
|
|||||||
- tlsoptions
|
- tlsoptions
|
||||||
- tlsstores
|
- tlsstores
|
||||||
- serverstransports
|
- serverstransports
|
||||||
|
- serverstransporttcps
|
||||||
- traefikservices
|
- traefikservices
|
||||||
|
- middlewaretcps
|
||||||
verbs:
|
verbs:
|
||||||
- get
|
- get
|
||||||
- list
|
- list
|
||||||
- watch
|
- watch
|
||||||
|
|
||||||
|
|||||||
@ -10,3 +10,4 @@ resources:
|
|||||||
- clusterrole.yaml
|
- clusterrole.yaml
|
||||||
- clusterrolebinding.yaml
|
- clusterrolebinding.yaml
|
||||||
- service.yaml
|
- service.yaml
|
||||||
|
- traefik-service-lb.yaml
|
||||||
|
|||||||
24
infrastructure/traefik/traefik-service-lb.yaml
Normal file
24
infrastructure/traefik/traefik-service-lb.yaml
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# infrastructure/traefik/traefik-service-lb.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: traefik
|
||||||
|
namespace: kube-system
|
||||||
|
annotations:
|
||||||
|
metallb.universe.tf/address-pool: communication-pool
|
||||||
|
spec:
|
||||||
|
type: LoadBalancer
|
||||||
|
loadBalancerClass: metallb
|
||||||
|
loadBalancerIP: 192.168.22.9
|
||||||
|
ports:
|
||||||
|
- name: web
|
||||||
|
port: 80
|
||||||
|
targetPort: web
|
||||||
|
protocol: TCP
|
||||||
|
- name: websecure
|
||||||
|
port: 443
|
||||||
|
targetPort: websecure
|
||||||
|
protocol: TCP
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/instance: traefik-kube-system
|
||||||
|
app.kubernetes.io/name: traefik
|
||||||
6
infrastructure/vault-csi/kustomization.yaml
Normal file
6
infrastructure/vault-csi/kustomization.yaml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
# infrastructure/vault-csi/kustomization.yaml
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
resources:
|
||||||
|
- secrets-store-csi-driver.yaml
|
||||||
|
- vault-csi-provider.yaml
|
||||||
20
infrastructure/vault-csi/secrets-store-csi-driver.yaml
Normal file
20
infrastructure/vault-csi/secrets-store-csi-driver.yaml
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# infrastructure/vault-csi/secrets-store-csi-driver.yaml
|
||||||
|
apiVersion: helm.toolkit.fluxcd.io/v2
|
||||||
|
kind: HelmRelease
|
||||||
|
metadata:
|
||||||
|
name: secrets-store-csi-driver
|
||||||
|
namespace: kube-system
|
||||||
|
spec:
|
||||||
|
interval: 15m
|
||||||
|
chart:
|
||||||
|
spec:
|
||||||
|
chart: secrets-store-csi-driver
|
||||||
|
version: "~1.3.0"
|
||||||
|
sourceRef:
|
||||||
|
kind: HelmRepository
|
||||||
|
name: secrets-store-csi-driver
|
||||||
|
namespace: flux-system
|
||||||
|
values:
|
||||||
|
syncSecret:
|
||||||
|
enabled: true
|
||||||
|
enableSecretRotation: false
|
||||||
111
infrastructure/vault-csi/vault-csi-provider.yaml
Normal file
111
infrastructure/vault-csi/vault-csi-provider.yaml
Normal file
@ -0,0 +1,111 @@
|
|||||||
|
# infrastructure/vault-csi/vault-csi-provider.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: vault-csi-provider
|
||||||
|
namespace: kube-system
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: vault-csi-provider-clusterrole
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["serviceaccounts/token"]
|
||||||
|
verbs: ["create"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: vault-csi-provider-clusterrolebinding
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: vault-csi-provider-clusterrole
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: vault-csi-provider
|
||||||
|
namespace: kube-system
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: vault-csi-provider-role
|
||||||
|
namespace: kube-system
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["secrets"]
|
||||||
|
verbs: ["get"]
|
||||||
|
resourceNames: ["vault-csi-provider-hmac-key"]
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["secrets"]
|
||||||
|
verbs: ["create"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: vault-csi-provider-rolebinding
|
||||||
|
namespace: kube-system
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: Role
|
||||||
|
name: vault-csi-provider-role
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: vault-csi-provider
|
||||||
|
namespace: kube-system
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: DaemonSet
|
||||||
|
metadata:
|
||||||
|
name: vault-csi-provider
|
||||||
|
namespace: kube-system
|
||||||
|
labels: { app.kubernetes.io/name: vault-csi-provider }
|
||||||
|
spec:
|
||||||
|
updateStrategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
selector:
|
||||||
|
matchLabels: { app.kubernetes.io/name: vault-csi-provider }
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels: { app.kubernetes.io/name: vault-csi-provider }
|
||||||
|
spec:
|
||||||
|
serviceAccountName: vault-csi-provider
|
||||||
|
containers:
|
||||||
|
- name: provider-vault-installer
|
||||||
|
image: hashicorp/vault-csi-provider:1.7.0
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
args:
|
||||||
|
- -endpoint=/provider/vault.sock
|
||||||
|
- -log-level=info
|
||||||
|
resources:
|
||||||
|
requests: { cpu: 50m, memory: 100Mi }
|
||||||
|
limits: { cpu: 50m, memory: 100Mi }
|
||||||
|
volumeMounts:
|
||||||
|
- { name: providervol, mountPath: "/provider" }
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: "/health/ready"
|
||||||
|
port: 8080
|
||||||
|
scheme: "HTTP"
|
||||||
|
failureThreshold: 2
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
successThreshold: 1
|
||||||
|
timeoutSeconds: 3
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: "/health/ready"
|
||||||
|
port: 8080
|
||||||
|
scheme: "HTTP"
|
||||||
|
failureThreshold: 2
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
successThreshold: 1
|
||||||
|
timeoutSeconds: 3
|
||||||
|
volumes:
|
||||||
|
- name: providervol
|
||||||
|
hostPath:
|
||||||
|
path: "/var/run/secrets-store-csi-providers"
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/os: linux
|
||||||
22
knowledge/INDEX.md
Normal file
22
knowledge/INDEX.md
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
Atlas Knowledge Base (KB)
|
||||||
|
|
||||||
|
This folder is the source-of-truth “memory” for Atlas/Titan assistants (and for humans). It is designed to be:
|
||||||
|
- Accurate (grounded in GitOps + read-only cluster tools)
|
||||||
|
- Maintainable (small docs + deterministic generators)
|
||||||
|
- Safe (no secrets; refer to Secret/Vault paths by name only)
|
||||||
|
|
||||||
|
Layout
|
||||||
|
- `knowledge/runbooks/`: human-written docs (short, chunkable Markdown).
|
||||||
|
- `knowledge/catalog/`: generated machine-readable facts (YAML/JSON).
|
||||||
|
- `knowledge/diagrams/`: generated Mermaid diagrams (`.mmd`) derived from the catalog.
|
||||||
|
|
||||||
|
Regeneration
|
||||||
|
- Update manifests/docs, then regenerate generated artifacts:
|
||||||
|
- `python scripts/knowledge_render_atlas.py --write`
|
||||||
|
|
||||||
|
Authoring rules
|
||||||
|
- Never include secret values. Prefer `secretRef` names or Vault paths like `kv/atlas/...`.
|
||||||
|
- Prefer stable identifiers: Kubernetes `namespace/name`, DNS hostnames, Flux kustomization paths.
|
||||||
|
- Keep each runbook small; one topic per file; use headings.
|
||||||
|
- When in doubt, link to the exact file path in this repo that configures the behavior.
|
||||||
|
|
||||||
8
knowledge/catalog/atlas-summary.json
Normal file
8
knowledge/catalog/atlas-summary.json
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"counts": {
|
||||||
|
"helmrelease_host_hints": 7,
|
||||||
|
"http_endpoints": 35,
|
||||||
|
"services": 44,
|
||||||
|
"workloads": 49
|
||||||
|
}
|
||||||
|
}
|
||||||
2771
knowledge/catalog/atlas.json
Normal file
2771
knowledge/catalog/atlas.json
Normal file
File diff suppressed because it is too large
Load Diff
1786
knowledge/catalog/atlas.yaml
Normal file
1786
knowledge/catalog/atlas.yaml
Normal file
File diff suppressed because it is too large
Load Diff
89
knowledge/catalog/runbooks.json
Normal file
89
knowledge/catalog/runbooks.json
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"path": "runbooks/ci-gitea-jenkins.md",
|
||||||
|
"title": "CI: Gitea \u2192 Jenkins pipeline",
|
||||||
|
"tags": [
|
||||||
|
"atlas",
|
||||||
|
"ci",
|
||||||
|
"gitea",
|
||||||
|
"jenkins"
|
||||||
|
],
|
||||||
|
"entrypoints": [
|
||||||
|
"scm.bstein.dev",
|
||||||
|
"ci.bstein.dev"
|
||||||
|
],
|
||||||
|
"source_paths": [
|
||||||
|
"services/gitea",
|
||||||
|
"services/jenkins",
|
||||||
|
"scripts/jenkins_cred_sync.sh",
|
||||||
|
"scripts/gitea_cred_sync.sh"
|
||||||
|
],
|
||||||
|
"body": "# CI: Gitea \u2192 Jenkins pipeline\n\n## What this is\nAtlas uses Gitea for source control and Jenkins for CI. Authentication is via Keycloak (SSO).\n\n## Where it is configured\n- Gitea manifests: `services/gitea/`\n- Jenkins manifests: `services/jenkins/`\n- Credential sync helpers: `scripts/gitea_cred_sync.sh`, `scripts/jenkins_cred_sync.sh`\n\n## What users do (typical flow)\n- Create a repo in Gitea.\n- Create/update a Jenkins job/pipeline that can fetch the repo.\n- Configure a webhook (or SCM polling) so pushes trigger builds.\n\n## Troubleshooting (common)\n- \u201cWebhook not firing\u201d: confirm ingress host, webhook URL, and Jenkins job is reachable.\n- \u201cAuth denied cloning\u201d: confirm Keycloak group membership and that Jenkins has a valid token/credential configured."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "runbooks/comms-verify.md",
|
||||||
|
"title": "Othrys verification checklist",
|
||||||
|
"tags": [
|
||||||
|
"comms",
|
||||||
|
"matrix",
|
||||||
|
"element",
|
||||||
|
"livekit"
|
||||||
|
],
|
||||||
|
"entrypoints": [
|
||||||
|
"https://live.bstein.dev",
|
||||||
|
"https://matrix.live.bstein.dev"
|
||||||
|
],
|
||||||
|
"source_paths": [],
|
||||||
|
"body": "1) Guest join:\n- Open a private window and visit:\n `https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join`\n- Confirm the guest join flow works and the displayname becomes `<word>-<word>`.\n\n2) Keycloak login:\n- Log in from `https://live.bstein.dev` and confirm MAS -> Keycloak -> Element redirect.\n\n3) Video rooms:\n- Start an Element Call room and confirm audio/video with a second account.\n- Check that guests can read public rooms but cannot start calls.\n\n4) Well-known:\n- `https://live.bstein.dev/.well-known/matrix/client` returns JSON.\n- `https://matrix.live.bstein.dev/.well-known/matrix/client` returns JSON.\n\n5) TURN reachability:\n- Confirm `turn.live.bstein.dev:3478` and `turns:5349` are reachable from WAN."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "runbooks/kb-authoring.md",
|
||||||
|
"title": "KB authoring: what to write (and what not to)",
|
||||||
|
"tags": [
|
||||||
|
"atlas",
|
||||||
|
"kb",
|
||||||
|
"runbooks"
|
||||||
|
],
|
||||||
|
"entrypoints": [],
|
||||||
|
"source_paths": [
|
||||||
|
"knowledge/runbooks",
|
||||||
|
"scripts/knowledge_render_atlas.py"
|
||||||
|
],
|
||||||
|
"body": "# KB authoring: what to write (and what not to)\n\n## The goal\nGive Atlas assistants enough grounded, Atlas-specific context to answer \u201chow do I\u2026?\u201d questions without guessing.\n\n## What to capture (high value)\n- User workflows: \u201cclick here, set X, expected result\u201d\n- Operator workflows: \u201cedit these files, reconcile this kustomization, verify with these commands\u201d\n- Wiring: \u201cthis host routes to this service; this service depends on Postgres/Vault/etc\u201d\n- Failure modes: exact error messages + the 2\u20135 checks that usually resolve them\n- Permissions: Keycloak groups/roles and what they unlock\n\n## What to avoid (low value / fluff)\n- Generic Kubernetes explanations (link to upstream docs instead)\n- Copy-pasting large manifests (prefer file paths + small snippets)\n- Anything that will drift quickly (render it from GitOps instead)\n- Any secret values (reference Secret/Vault locations by name only)\n\n## Document pattern (recommended)\nEach runbook should answer:\n- \u201cWhat is this?\u201d\n- \u201cWhat do users do?\u201d\n- \u201cWhat do operators change (where in Git)?\u201d\n- \u201cHow do we verify it works?\u201d\n- \u201cWhat breaks and how to debug it?\u201d"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "runbooks/observability.md",
|
||||||
|
"title": "Observability: Grafana + VictoriaMetrics (how to query safely)",
|
||||||
|
"tags": [
|
||||||
|
"atlas",
|
||||||
|
"monitoring",
|
||||||
|
"grafana",
|
||||||
|
"victoriametrics"
|
||||||
|
],
|
||||||
|
"entrypoints": [
|
||||||
|
"metrics.bstein.dev",
|
||||||
|
"alerts.bstein.dev"
|
||||||
|
],
|
||||||
|
"source_paths": [
|
||||||
|
"services/monitoring"
|
||||||
|
],
|
||||||
|
"body": "# Observability: Grafana + VictoriaMetrics (how to query safely)\n\n## Where it is configured\n- `services/monitoring/helmrelease.yaml` (Grafana + Alertmanager + VM values)\n- `services/monitoring/grafana-dashboard-*.yaml` (dashboards and their PromQL)\n\n## Using metrics as a \u201ctool\u201d for Atlas assistants\nThe safest pattern is: map a small set of intents \u2192 fixed PromQL queries, then summarize results.\n\nExamples (intents)\n- \u201cIs the cluster healthy?\u201d \u2192 node readiness + pod restart rate\n- \u201cWhy is Element Call failing?\u201d \u2192 LiveKit/coturn pod restarts + synapse errors + ingress 5xx\n- \u201cIs Jenkins slow?\u201d \u2192 pod CPU/memory + HTTP latency metrics (if exported)\n\n## Why dashboards are not the KB\nDashboards are great references, but the assistant should query VictoriaMetrics directly for live answers and keep the\nKB focused on wiring, runbooks, and stable conventions."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "runbooks/template.md",
|
||||||
|
"title": "<short title>",
|
||||||
|
"tags": [
|
||||||
|
"atlas",
|
||||||
|
"<service>",
|
||||||
|
"<topic>"
|
||||||
|
],
|
||||||
|
"entrypoints": [
|
||||||
|
"<hostnames if relevant>"
|
||||||
|
],
|
||||||
|
"source_paths": [
|
||||||
|
"services/<svc>",
|
||||||
|
"clusters/atlas/<...>"
|
||||||
|
],
|
||||||
|
"body": "# <Short title>\n\n## What this is\n\n## For users (how to)\n\n## For operators (where configured)\n\n## Troubleshooting (symptoms \u2192 checks)"
|
||||||
|
}
|
||||||
|
]
|
||||||
189
knowledge/diagrams/atlas-http.mmd
Normal file
189
knowledge/diagrams/atlas-http.mmd
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
flowchart LR
|
||||||
|
host_auth_bstein_dev["auth.bstein.dev"]
|
||||||
|
svc_sso_oauth2_proxy["sso/oauth2-proxy (Service)"]
|
||||||
|
host_auth_bstein_dev --> svc_sso_oauth2_proxy
|
||||||
|
wl_sso_oauth2_proxy["sso/oauth2-proxy (Deployment)"]
|
||||||
|
svc_sso_oauth2_proxy --> wl_sso_oauth2_proxy
|
||||||
|
host_bstein_dev["bstein.dev"]
|
||||||
|
svc_bstein_dev_home_bstein_dev_home_frontend["bstein-dev-home/bstein-dev-home-frontend (Service)"]
|
||||||
|
host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_frontend
|
||||||
|
wl_bstein_dev_home_bstein_dev_home_frontend["bstein-dev-home/bstein-dev-home-frontend (Deployment)"]
|
||||||
|
svc_bstein_dev_home_bstein_dev_home_frontend --> wl_bstein_dev_home_bstein_dev_home_frontend
|
||||||
|
svc_comms_matrix_wellknown["comms/matrix-wellknown (Service)"]
|
||||||
|
host_bstein_dev --> svc_comms_matrix_wellknown
|
||||||
|
wl_comms_matrix_wellknown["comms/matrix-wellknown (Deployment)"]
|
||||||
|
svc_comms_matrix_wellknown --> wl_comms_matrix_wellknown
|
||||||
|
svc_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Service)"]
|
||||||
|
host_bstein_dev --> svc_bstein_dev_home_bstein_dev_home_backend
|
||||||
|
wl_bstein_dev_home_bstein_dev_home_backend["bstein-dev-home/bstein-dev-home-backend (Deployment)"]
|
||||||
|
svc_bstein_dev_home_bstein_dev_home_backend --> wl_bstein_dev_home_bstein_dev_home_backend
|
||||||
|
host_call_live_bstein_dev["call.live.bstein.dev"]
|
||||||
|
svc_comms_element_call["comms/element-call (Service)"]
|
||||||
|
host_call_live_bstein_dev --> svc_comms_element_call
|
||||||
|
wl_comms_element_call["comms/element-call (Deployment)"]
|
||||||
|
svc_comms_element_call --> wl_comms_element_call
|
||||||
|
host_chat_ai_bstein_dev["chat.ai.bstein.dev"]
|
||||||
|
svc_bstein_dev_home_chat_ai_gateway["bstein-dev-home/chat-ai-gateway (Service)"]
|
||||||
|
host_chat_ai_bstein_dev --> svc_bstein_dev_home_chat_ai_gateway
|
||||||
|
wl_bstein_dev_home_chat_ai_gateway["bstein-dev-home/chat-ai-gateway (Deployment)"]
|
||||||
|
svc_bstein_dev_home_chat_ai_gateway --> wl_bstein_dev_home_chat_ai_gateway
|
||||||
|
host_ci_bstein_dev["ci.bstein.dev"]
|
||||||
|
svc_jenkins_jenkins["jenkins/jenkins (Service)"]
|
||||||
|
host_ci_bstein_dev --> svc_jenkins_jenkins
|
||||||
|
wl_jenkins_jenkins["jenkins/jenkins (Deployment)"]
|
||||||
|
svc_jenkins_jenkins --> wl_jenkins_jenkins
|
||||||
|
host_cloud_bstein_dev["cloud.bstein.dev"]
|
||||||
|
svc_nextcloud_nextcloud["nextcloud/nextcloud (Service)"]
|
||||||
|
host_cloud_bstein_dev --> svc_nextcloud_nextcloud
|
||||||
|
wl_nextcloud_nextcloud["nextcloud/nextcloud (Deployment)"]
|
||||||
|
svc_nextcloud_nextcloud --> wl_nextcloud_nextcloud
|
||||||
|
host_kit_live_bstein_dev["kit.live.bstein.dev"]
|
||||||
|
svc_comms_livekit_token_service["comms/livekit-token-service (Service)"]
|
||||||
|
host_kit_live_bstein_dev --> svc_comms_livekit_token_service
|
||||||
|
wl_comms_livekit_token_service["comms/livekit-token-service (Deployment)"]
|
||||||
|
svc_comms_livekit_token_service --> wl_comms_livekit_token_service
|
||||||
|
svc_comms_livekit["comms/livekit (Service)"]
|
||||||
|
host_kit_live_bstein_dev --> svc_comms_livekit
|
||||||
|
wl_comms_livekit["comms/livekit (Deployment)"]
|
||||||
|
svc_comms_livekit --> wl_comms_livekit
|
||||||
|
host_live_bstein_dev["live.bstein.dev"]
|
||||||
|
svc_comms_othrys_element_element_web["comms/othrys-element-element-web (Service)"]
|
||||||
|
host_live_bstein_dev --> svc_comms_othrys_element_element_web
|
||||||
|
wl_comms_othrys_element_element_web["comms/othrys-element-element-web (Deployment)"]
|
||||||
|
svc_comms_othrys_element_element_web --> wl_comms_othrys_element_element_web
|
||||||
|
host_live_bstein_dev --> svc_comms_matrix_wellknown
|
||||||
|
svc_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Service)"]
|
||||||
|
host_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse
|
||||||
|
wl_comms_othrys_synapse_matrix_synapse["comms/othrys-synapse-matrix-synapse (Deployment)"]
|
||||||
|
svc_comms_othrys_synapse_matrix_synapse --> wl_comms_othrys_synapse_matrix_synapse
|
||||||
|
host_longhorn_bstein_dev["longhorn.bstein.dev"]
|
||||||
|
svc_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Service)"]
|
||||||
|
host_longhorn_bstein_dev --> svc_longhorn_system_oauth2_proxy_longhorn
|
||||||
|
wl_longhorn_system_oauth2_proxy_longhorn["longhorn-system/oauth2-proxy-longhorn (Deployment)"]
|
||||||
|
svc_longhorn_system_oauth2_proxy_longhorn --> wl_longhorn_system_oauth2_proxy_longhorn
|
||||||
|
host_mail_bstein_dev["mail.bstein.dev"]
|
||||||
|
svc_mailu_mailserver_mailu_front["mailu-mailserver/mailu-front (Service)"]
|
||||||
|
host_mail_bstein_dev --> svc_mailu_mailserver_mailu_front
|
||||||
|
host_matrix_live_bstein_dev["matrix.live.bstein.dev"]
|
||||||
|
svc_comms_matrix_authentication_service["comms/matrix-authentication-service (Service)"]
|
||||||
|
host_matrix_live_bstein_dev --> svc_comms_matrix_authentication_service
|
||||||
|
wl_comms_matrix_authentication_service["comms/matrix-authentication-service (Deployment)"]
|
||||||
|
svc_comms_matrix_authentication_service --> wl_comms_matrix_authentication_service
|
||||||
|
host_matrix_live_bstein_dev --> svc_comms_matrix_wellknown
|
||||||
|
host_matrix_live_bstein_dev --> svc_comms_othrys_synapse_matrix_synapse
|
||||||
|
svc_comms_matrix_guest_register["comms/matrix-guest-register (Service)"]
|
||||||
|
host_matrix_live_bstein_dev --> svc_comms_matrix_guest_register
|
||||||
|
wl_comms_matrix_guest_register["comms/matrix-guest-register (Deployment)"]
|
||||||
|
svc_comms_matrix_guest_register --> wl_comms_matrix_guest_register
|
||||||
|
host_monero_bstein_dev["monero.bstein.dev"]
|
||||||
|
svc_crypto_monerod["crypto/monerod (Service)"]
|
||||||
|
host_monero_bstein_dev --> svc_crypto_monerod
|
||||||
|
wl_crypto_monerod["crypto/monerod (Deployment)"]
|
||||||
|
svc_crypto_monerod --> wl_crypto_monerod
|
||||||
|
host_office_bstein_dev["office.bstein.dev"]
|
||||||
|
svc_nextcloud_collabora["nextcloud/collabora (Service)"]
|
||||||
|
host_office_bstein_dev --> svc_nextcloud_collabora
|
||||||
|
wl_nextcloud_collabora["nextcloud/collabora (Deployment)"]
|
||||||
|
svc_nextcloud_collabora --> wl_nextcloud_collabora
|
||||||
|
host_pegasus_bstein_dev["pegasus.bstein.dev"]
|
||||||
|
svc_jellyfin_pegasus["jellyfin/pegasus (Service)"]
|
||||||
|
host_pegasus_bstein_dev --> svc_jellyfin_pegasus
|
||||||
|
wl_jellyfin_pegasus["jellyfin/pegasus (Deployment)"]
|
||||||
|
svc_jellyfin_pegasus --> wl_jellyfin_pegasus
|
||||||
|
host_scm_bstein_dev["scm.bstein.dev"]
|
||||||
|
svc_gitea_gitea["gitea/gitea (Service)"]
|
||||||
|
host_scm_bstein_dev --> svc_gitea_gitea
|
||||||
|
wl_gitea_gitea["gitea/gitea (Deployment)"]
|
||||||
|
svc_gitea_gitea --> wl_gitea_gitea
|
||||||
|
host_secret_bstein_dev["secret.bstein.dev"]
|
||||||
|
svc_vault_vault["vault/vault (Service)"]
|
||||||
|
host_secret_bstein_dev --> svc_vault_vault
|
||||||
|
wl_vault_vault["vault/vault (StatefulSet)"]
|
||||||
|
svc_vault_vault --> wl_vault_vault
|
||||||
|
host_sso_bstein_dev["sso.bstein.dev"]
|
||||||
|
svc_sso_keycloak["sso/keycloak (Service)"]
|
||||||
|
host_sso_bstein_dev --> svc_sso_keycloak
|
||||||
|
wl_sso_keycloak["sso/keycloak (Deployment)"]
|
||||||
|
svc_sso_keycloak --> wl_sso_keycloak
|
||||||
|
host_stream_bstein_dev["stream.bstein.dev"]
|
||||||
|
svc_jellyfin_jellyfin["jellyfin/jellyfin (Service)"]
|
||||||
|
host_stream_bstein_dev --> svc_jellyfin_jellyfin
|
||||||
|
wl_jellyfin_jellyfin["jellyfin/jellyfin (Deployment)"]
|
||||||
|
svc_jellyfin_jellyfin --> wl_jellyfin_jellyfin
|
||||||
|
host_vault_bstein_dev["vault.bstein.dev"]
|
||||||
|
svc_vaultwarden_vaultwarden_service["vaultwarden/vaultwarden-service (Service)"]
|
||||||
|
host_vault_bstein_dev --> svc_vaultwarden_vaultwarden_service
|
||||||
|
wl_vaultwarden_vaultwarden["vaultwarden/vaultwarden (Deployment)"]
|
||||||
|
svc_vaultwarden_vaultwarden_service --> wl_vaultwarden_vaultwarden
|
||||||
|
|
||||||
|
subgraph bstein_dev_home[bstein-dev-home]
|
||||||
|
svc_bstein_dev_home_bstein_dev_home_frontend
|
||||||
|
wl_bstein_dev_home_bstein_dev_home_frontend
|
||||||
|
svc_bstein_dev_home_bstein_dev_home_backend
|
||||||
|
wl_bstein_dev_home_bstein_dev_home_backend
|
||||||
|
svc_bstein_dev_home_chat_ai_gateway
|
||||||
|
wl_bstein_dev_home_chat_ai_gateway
|
||||||
|
end
|
||||||
|
subgraph comms[comms]
|
||||||
|
svc_comms_matrix_wellknown
|
||||||
|
wl_comms_matrix_wellknown
|
||||||
|
svc_comms_element_call
|
||||||
|
wl_comms_element_call
|
||||||
|
svc_comms_livekit_token_service
|
||||||
|
wl_comms_livekit_token_service
|
||||||
|
svc_comms_livekit
|
||||||
|
wl_comms_livekit
|
||||||
|
svc_comms_othrys_element_element_web
|
||||||
|
wl_comms_othrys_element_element_web
|
||||||
|
svc_comms_othrys_synapse_matrix_synapse
|
||||||
|
wl_comms_othrys_synapse_matrix_synapse
|
||||||
|
svc_comms_matrix_authentication_service
|
||||||
|
wl_comms_matrix_authentication_service
|
||||||
|
svc_comms_matrix_guest_register
|
||||||
|
wl_comms_matrix_guest_register
|
||||||
|
end
|
||||||
|
subgraph crypto[crypto]
|
||||||
|
svc_crypto_monerod
|
||||||
|
wl_crypto_monerod
|
||||||
|
end
|
||||||
|
subgraph gitea[gitea]
|
||||||
|
svc_gitea_gitea
|
||||||
|
wl_gitea_gitea
|
||||||
|
end
|
||||||
|
subgraph jellyfin[jellyfin]
|
||||||
|
svc_jellyfin_pegasus
|
||||||
|
wl_jellyfin_pegasus
|
||||||
|
svc_jellyfin_jellyfin
|
||||||
|
wl_jellyfin_jellyfin
|
||||||
|
end
|
||||||
|
subgraph jenkins[jenkins]
|
||||||
|
svc_jenkins_jenkins
|
||||||
|
wl_jenkins_jenkins
|
||||||
|
end
|
||||||
|
subgraph longhorn_system[longhorn-system]
|
||||||
|
svc_longhorn_system_oauth2_proxy_longhorn
|
||||||
|
wl_longhorn_system_oauth2_proxy_longhorn
|
||||||
|
end
|
||||||
|
subgraph mailu_mailserver[mailu-mailserver]
|
||||||
|
svc_mailu_mailserver_mailu_front
|
||||||
|
end
|
||||||
|
subgraph nextcloud[nextcloud]
|
||||||
|
svc_nextcloud_nextcloud
|
||||||
|
wl_nextcloud_nextcloud
|
||||||
|
svc_nextcloud_collabora
|
||||||
|
wl_nextcloud_collabora
|
||||||
|
end
|
||||||
|
subgraph sso[sso]
|
||||||
|
svc_sso_oauth2_proxy
|
||||||
|
wl_sso_oauth2_proxy
|
||||||
|
svc_sso_keycloak
|
||||||
|
wl_sso_keycloak
|
||||||
|
end
|
||||||
|
subgraph vault[vault]
|
||||||
|
svc_vault_vault
|
||||||
|
wl_vault_vault
|
||||||
|
end
|
||||||
|
subgraph vaultwarden[vaultwarden]
|
||||||
|
svc_vaultwarden_vaultwarden_service
|
||||||
|
wl_vaultwarden_vaultwarden
|
||||||
|
end
|
||||||
26
knowledge/metis.md
Normal file
26
knowledge/metis.md
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# Metis (node recovery)
|
||||||
|
|
||||||
|
## Node classes (current map)
|
||||||
|
- rpi5 Ubuntu workers: titan-04,05,06,07,08,09,10,11,20,21 (Ubuntu 24.04.3, k3s agent)
|
||||||
|
- rpi5 control-plane: titan-0a/0b/0c (Ubuntu 24.04.1, k3s server, control-plane taint)
|
||||||
|
- rpi4 Armbian longhorn: titan-13/15/17/19 (Armbian 6.6.x, k3s agent, longhorn disks)
|
||||||
|
- rpi4 Armbian standard: titan-12/14/18 (Armbian 6.6.x, k3s agent)
|
||||||
|
- amd64 agents: titan-22/24 (Debian 13, k3s agent)
|
||||||
|
- External/non-cluster: tethys, titan-db, titan-jh, oceanus/titan-23, future titan-20/21 (when added), plus any newcomers.
|
||||||
|
|
||||||
|
## Longhorn disk UUIDs (critical nodes)
|
||||||
|
- titan-13: /mnt/astreae UUID=6031fa8b-f28c-45c3-b7bc-6133300e07c6 (ext4); /mnt/asteria UUID=cbd4989d-62b5-4741-8b2a-28fdae259cae (ext4)
|
||||||
|
- titan-15: /mnt/astreae UUID=f3362f14-5822-449f-944b-ac570b5cd615 (ext4); /mnt/asteria UUID=9c5316e6-f847-4884-b502-11f2d0d15d6f (ext4)
|
||||||
|
- titan-17: /mnt/astreae UUID=1fecdade-08b0-49cb-9ae3-be6c188b0a96 (ext4); /mnt/asteria UUID=2fe9f613-d372-47ca-b84f-82084e4edda0 (ext4)
|
||||||
|
- titan-19: /mnt/astreae UUID=4890abb9-dda2-4f4f-9c0f-081ee82849cf (ext4); /mnt/asteria UUID=2b4ea28d-b0e6-4fa3-841b-cd7067ae9153 (ext4)
|
||||||
|
|
||||||
|
## Metis repo (~/Development/metis)
|
||||||
|
- CLI skeleton in Go (`cmd/metis`), inventory loader (`pkg/inventory`), plan builder (`pkg/plan`).
|
||||||
|
- `inventory.example.yaml` shows expected schema (classes + per-node overlay, Longhorn disks, labels, taints).
|
||||||
|
- `AGENTS.md` in repo is untracked and holds raw notes.
|
||||||
|
|
||||||
|
## Next implementation steps
|
||||||
|
- Add per-class golden image refs and checksums (Harbor or file://) when ready.
|
||||||
|
- Implement burn execution: download with checksum, write via dd/etcher-equivalent, mount boot/root to inject hostname/IP/k3s tokens/labels/taints, journald/GC drop-ins, and Longhorn fstab entries. Add Windows writer (diskpart + wmic) and Linux writer (dd + sgdisk) paths.
|
||||||
|
- Add Keycloak/SSH bootstrap: ensure ssh user, authorized keys, and k3s token/URL injection for agents; control-plane restore path with etcd snapshot selection.
|
||||||
|
- Add per-host inventory entries for tethys, titan-db, titan-jh, oceanus/titan-23, future 20/21 once audited.
|
||||||
27
knowledge/runbooks/ci-gitea-jenkins.md
Normal file
27
knowledge/runbooks/ci-gitea-jenkins.md
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
---
|
||||||
|
title: "CI: Gitea → Jenkins pipeline"
|
||||||
|
tags: ["atlas", "ci", "gitea", "jenkins"]
|
||||||
|
owners: ["brad"]
|
||||||
|
entrypoints: ["scm.bstein.dev", "ci.bstein.dev"]
|
||||||
|
source_paths: ["services/gitea", "services/jenkins", "scripts/jenkins_cred_sync.sh", "scripts/gitea_cred_sync.sh"]
|
||||||
|
---
|
||||||
|
|
||||||
|
# CI: Gitea → Jenkins pipeline
|
||||||
|
|
||||||
|
## What this is
|
||||||
|
Atlas uses Gitea for source control and Jenkins for CI. Authentication is via Keycloak (SSO).
|
||||||
|
|
||||||
|
## Where it is configured
|
||||||
|
- Gitea manifests: `services/gitea/`
|
||||||
|
- Jenkins manifests: `services/jenkins/`
|
||||||
|
- Credential sync helpers: `scripts/gitea_cred_sync.sh`, `scripts/jenkins_cred_sync.sh`
|
||||||
|
|
||||||
|
## What users do (typical flow)
|
||||||
|
- Create a repo in Gitea.
|
||||||
|
- Create/update a Jenkins job/pipeline that can fetch the repo.
|
||||||
|
- Configure a webhook (or SCM polling) so pushes trigger builds.
|
||||||
|
|
||||||
|
## Troubleshooting (common)
|
||||||
|
- “Webhook not firing”: confirm ingress host, webhook URL, and Jenkins job is reachable.
|
||||||
|
- “Auth denied cloning”: confirm Keycloak group membership and that Jenkins has a valid token/credential configured.
|
||||||
|
|
||||||
30
knowledge/runbooks/comms-verify.md
Normal file
30
knowledge/runbooks/comms-verify.md
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
---
|
||||||
|
title: Othrys verification checklist
|
||||||
|
tags:
|
||||||
|
- comms
|
||||||
|
- matrix
|
||||||
|
- element
|
||||||
|
- livekit
|
||||||
|
entrypoints:
|
||||||
|
- https://live.bstein.dev
|
||||||
|
- https://matrix.live.bstein.dev
|
||||||
|
---
|
||||||
|
|
||||||
|
1) Guest join:
|
||||||
|
- Open a private window and visit:
|
||||||
|
`https://live.bstein.dev/#/room/#othrys:live.bstein.dev?action=join`
|
||||||
|
- Confirm the guest join flow works and the displayname becomes `<word>-<word>`.
|
||||||
|
|
||||||
|
2) Keycloak login:
|
||||||
|
- Log in from `https://live.bstein.dev` and confirm MAS -> Keycloak -> Element redirect.
|
||||||
|
|
||||||
|
3) Video rooms:
|
||||||
|
- Start an Element Call room and confirm audio/video with a second account.
|
||||||
|
- Check that guests can read public rooms but cannot start calls.
|
||||||
|
|
||||||
|
4) Well-known:
|
||||||
|
- `https://live.bstein.dev/.well-known/matrix/client` returns JSON.
|
||||||
|
- `https://matrix.live.bstein.dev/.well-known/matrix/client` returns JSON.
|
||||||
|
|
||||||
|
5) TURN reachability:
|
||||||
|
- Confirm `turn.live.bstein.dev:3478` and `turns:5349` are reachable from WAN.
|
||||||
34
knowledge/runbooks/kb-authoring.md
Normal file
34
knowledge/runbooks/kb-authoring.md
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
---
|
||||||
|
title: "KB authoring: what to write (and what not to)"
|
||||||
|
tags: ["atlas", "kb", "runbooks"]
|
||||||
|
owners: ["brad"]
|
||||||
|
entrypoints: []
|
||||||
|
source_paths: ["knowledge/runbooks", "scripts/knowledge_render_atlas.py"]
|
||||||
|
---
|
||||||
|
|
||||||
|
# KB authoring: what to write (and what not to)
|
||||||
|
|
||||||
|
## The goal
|
||||||
|
Give Atlas assistants enough grounded, Atlas-specific context to answer “how do I…?” questions without guessing.
|
||||||
|
|
||||||
|
## What to capture (high value)
|
||||||
|
- User workflows: “click here, set X, expected result”
|
||||||
|
- Operator workflows: “edit these files, reconcile this kustomization, verify with these commands”
|
||||||
|
- Wiring: “this host routes to this service; this service depends on Postgres/Vault/etc”
|
||||||
|
- Failure modes: exact error messages + the 2–5 checks that usually resolve them
|
||||||
|
- Permissions: Keycloak groups/roles and what they unlock
|
||||||
|
|
||||||
|
## What to avoid (low value / fluff)
|
||||||
|
- Generic Kubernetes explanations (link to upstream docs instead)
|
||||||
|
- Copy-pasting large manifests (prefer file paths + small snippets)
|
||||||
|
- Anything that will drift quickly (render it from GitOps instead)
|
||||||
|
- Any secret values (reference Secret/Vault locations by name only)
|
||||||
|
|
||||||
|
## Document pattern (recommended)
|
||||||
|
Each runbook should answer:
|
||||||
|
- “What is this?”
|
||||||
|
- “What do users do?”
|
||||||
|
- “What do operators change (where in Git)?”
|
||||||
|
- “How do we verify it works?”
|
||||||
|
- “What breaks and how to debug it?”
|
||||||
|
|
||||||
26
knowledge/runbooks/observability.md
Normal file
26
knowledge/runbooks/observability.md
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
---
|
||||||
|
title: "Observability: Grafana + VictoriaMetrics (how to query safely)"
|
||||||
|
tags: ["atlas", "monitoring", "grafana", "victoriametrics"]
|
||||||
|
owners: ["brad"]
|
||||||
|
entrypoints: ["metrics.bstein.dev", "alerts.bstein.dev"]
|
||||||
|
source_paths: ["services/monitoring"]
|
||||||
|
---
|
||||||
|
|
||||||
|
# Observability: Grafana + VictoriaMetrics (how to query safely)
|
||||||
|
|
||||||
|
## Where it is configured
|
||||||
|
- `services/monitoring/helmrelease.yaml` (Grafana + Alertmanager + VM values)
|
||||||
|
- `services/monitoring/grafana-dashboard-*.yaml` (dashboards and their PromQL)
|
||||||
|
|
||||||
|
## Using metrics as a “tool” for Atlas assistants
|
||||||
|
The safest pattern is: map a small set of intents → fixed PromQL queries, then summarize results.
|
||||||
|
|
||||||
|
Examples (intents)
|
||||||
|
- “Is the cluster healthy?” → node readiness + pod restart rate
|
||||||
|
- “Why is Element Call failing?” → LiveKit/coturn pod restarts + synapse errors + ingress 5xx
|
||||||
|
- “Is Jenkins slow?” → pod CPU/memory + HTTP latency metrics (if exported)
|
||||||
|
|
||||||
|
## Why dashboards are not the KB
|
||||||
|
Dashboards are great references, but the assistant should query VictoriaMetrics directly for live answers and keep the
|
||||||
|
KB focused on wiring, runbooks, and stable conventions.
|
||||||
|
|
||||||
18
knowledge/runbooks/template.md
Normal file
18
knowledge/runbooks/template.md
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
---
|
||||||
|
title: "<short title>"
|
||||||
|
tags: ["atlas", "<service>", "<topic>"]
|
||||||
|
owners: ["brad"]
|
||||||
|
entrypoints: ["<hostnames if relevant>"]
|
||||||
|
source_paths: ["services/<svc>", "clusters/atlas/<...>"]
|
||||||
|
---
|
||||||
|
|
||||||
|
# <Short title>
|
||||||
|
|
||||||
|
## What this is
|
||||||
|
|
||||||
|
## For users (how to)
|
||||||
|
|
||||||
|
## For operators (where configured)
|
||||||
|
|
||||||
|
## Troubleshooting (symptoms → checks)
|
||||||
|
|
||||||
73
knowledge/software/metis.md
Normal file
73
knowledge/software/metis.md
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
# Metis (node recovery)
|
||||||
|
|
||||||
|
## Node classes (current map)
|
||||||
|
- rpi5 Ubuntu workers: titan-04,05,06,07,08,09,10,11,20,21 (Ubuntu 24.04.3, k3s agent)
|
||||||
|
- rpi5 control-plane: titan-0a/0b/0c (Ubuntu 24.04.1, k3s server, control-plane taint)
|
||||||
|
- rpi4 Armbian longhorn: titan-13/15/17/19 (Armbian 6.6.x, k3s agent, longhorn disks)
|
||||||
|
- rpi4 Armbian standard: titan-12/14/18 (Armbian 6.6.x, k3s agent)
|
||||||
|
- amd64 agents: titan-22/24 (Debian 13, k3s agent)
|
||||||
|
- External/non-cluster: tethys, titan-db, titan-jh, oceanus/titan-23, plus any newcomers.
|
||||||
|
|
||||||
|
### Jetson nodes (titan-20/21)
|
||||||
|
- Ubuntu 20.04.6 (Focal), kernel 5.10.104-tegra, CRI containerd 2.0.5-k3s2, arch arm64.
|
||||||
|
- Storage: NVMe 232G at / (ext4); onboard mmc partitions present but root on NVMe; 1.9T sda present (unused).
|
||||||
|
- k3s agent with drop-in 99-nofile.conf.
|
||||||
|
|
||||||
|
## Longhorn disk UUIDs (critical nodes)
|
||||||
|
- titan-13: /mnt/astreae UUID=6031fa8b-f28c-45c3-b7bc-6133300e07c6 (ext4); /mnt/asteria UUID=cbd4989d-62b5-4741-8b2a-28fdae259cae (ext4)
|
||||||
|
- titan-15: /mnt/astreae UUID=f3362f14-5822-449f-944b-ac570b5cd615 (ext4); /mnt/asteria UUID=9c5316e6-f847-4884-b502-11f2d0d15d6f (ext4)
|
||||||
|
- titan-17: /mnt/astreae UUID=1fecdade-08b0-49cb-9ae3-be6c188b0a96 (ext4); /mnt/asteria UUID=2fe9f613-d372-47ca-b84f-82084e4edda0 (ext4)
|
||||||
|
- titan-19: /mnt/astreae UUID=4890abb9-dda2-4f4f-9c0f-081ee82849cf (ext4); /mnt/asteria UUID=2b4ea28d-b0e6-4fa3-841b-cd7067ae9153 (ext4)
|
||||||
|
|
||||||
|
## Metis repo (~/Development/metis)
|
||||||
|
- CLI skeleton in Go (`cmd/metis`), inventory loader (`pkg/inventory`), plan builder (`pkg/plan`).
|
||||||
|
- `inventory.example.yaml` shows expected schema (classes + per-node overlay, Longhorn disks, labels, taints).
|
||||||
|
- `AGENTS.md` in repo is untracked and holds raw notes.
|
||||||
|
|
||||||
|
## Next implementation steps
|
||||||
|
- Add per-class golden image refs and checksums (Harbor or file://) when ready.
|
||||||
|
- Implement burn execution: download with checksum, write via dd/etcher-equivalent, mount boot/root to inject hostname/IP/k3s tokens/labels/taints, journald/GC drop-ins, and Longhorn fstab entries. Add Windows writer (diskpart + wmic) and Linux writer (dd + sgdisk) paths.
|
||||||
|
- Add Keycloak/SSH bootstrap: ensure ssh user, authorized keys, and k3s token/URL injection for agents; control-plane restore path with etcd snapshot selection.
|
||||||
|
- Add per-host inventory entries for tethys, titan-db, titan-jh, oceanus/titan-23, future 20/21 once audited.
|
||||||
|
|
||||||
|
## Node OS/Kernel/CRI snapshot (Jan 2026)
|
||||||
|
- titan-04: Ubuntu 24.04.3 LTS, kernel 6.8.0-1031-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
||||||
|
- titan-05: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
||||||
|
- titan-06: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
||||||
|
- titan-07: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
||||||
|
- titan-08: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
||||||
|
- titan-09: Ubuntu 24.04.3 LTS, kernel 6.8.0-1031-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
||||||
|
- titan-0a: Ubuntu 24.04.1 LTS, kernel 6.8.0-1038-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
||||||
|
- titan-0b: Ubuntu 24.04.1 LTS, kernel 6.8.0-1038-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
||||||
|
- titan-0c: Ubuntu 24.04.1 LTS, kernel 6.8.0-1038-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
||||||
|
- titan-10: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
||||||
|
- titan-11: Ubuntu 24.04.3 LTS, kernel 6.8.0-1039-raspi, CRI containerd://2.0.5-k3s2, arch arm64
|
||||||
|
- titan-12: Armbian 24.11.1 noble, kernel 6.6.60-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64
|
||||||
|
- titan-13: Armbian 25.2.1 noble, kernel 6.6.63-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64
|
||||||
|
- titan-14: Armbian 24.11.1 noble, kernel 6.6.60-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64
|
||||||
|
- titan-15: Armbian 25.2.1 noble, kernel 6.6.63-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64
|
||||||
|
- titan-17: Armbian 25.2.1 noble, kernel 6.6.63-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64
|
||||||
|
- titan-18: Armbian 24.11.1 noble, kernel 6.6.60-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64
|
||||||
|
- titan-19: Armbian 25.2.1 noble, kernel 6.6.63-current-bcm2711, CRI containerd://1.7.23-k3s2, arch arm64
|
||||||
|
- titan-20: Ubuntu 20.04.6 LTS, kernel 5.10.104-tegra, CRI containerd://2.0.5-k3s2, arch arm64
|
||||||
|
- titan-21: Ubuntu 20.04.6 LTS, kernel 5.10.104-tegra, CRI containerd://2.0.5-k3s2, arch arm64
|
||||||
|
- titan-22: Debian 13 (trixie), kernel 6.12.41+deb13-amd64, CRI containerd://2.0.5-k3s2, arch amd64
|
||||||
|
- titan-24: Debian 13 (trixie), kernel 6.12.57+deb13-amd64, CRI containerd://2.0.5-k3s2, arch amd64
|
||||||
|
|
||||||
|
|
||||||
|
### External hosts
|
||||||
|
- titan-db: Ubuntu 24.10, kernel 6.11.0-1015-raspi, root on /dev/sda2 ext4 (465G), boot vfat /dev/sda1; PostgreSQL service enabled.
|
||||||
|
- titan-jh: Arch Linux ARM (rolling), kernel 6.18.4-2-rpi, NVMe root ext4 238G (/), boot vfat 512M; ~495 packages installed (pacman -Q).
|
||||||
|
- titan-23/oceanus: TODO audit (future).
|
||||||
|
|
||||||
|
|
||||||
|
### Control plane Pis (titan-0a/0b/0c)
|
||||||
|
- Ubuntu 24.04.1 LTS, kernel 6.8.0-1038-raspi, containerd 2.0.5-k3s2.
|
||||||
|
- Storage: 477G SSD root (/dev/sda2 ext4), /boot/firmware vfat (/dev/sda1). fstab uses LABEL=writable and LABEL=system-boot.
|
||||||
|
- k3s server (control-plane taint expected); etcd snapshots not yet cataloged (TODO).
|
||||||
|
|
||||||
|
|
||||||
|
## k3s versions
|
||||||
|
- rpi5 workers/control-plane: k3s v1.33.3+k3s1 (crictl v1.31.0-k3s2)
|
||||||
|
- rpi4 nodes: k3s v1.31.5+k3s1 (crictl v1.31.0-k3s2)
|
||||||
|
- Jetson titan-20/21: k3s v1.33.3+k3s1 (per node info), crictl v1.31.0-k3s2
|
||||||
5
scripts/comms_sync_kb.sh
Executable file
5
scripts/comms_sync_kb.sh
Executable file
@ -0,0 +1,5 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
python scripts/knowledge_render_atlas.py --write
|
||||||
|
python scripts/knowledge_render_atlas.py --write --out services/comms/knowledge
|
||||||
@ -9,6 +9,7 @@ Usage:
|
|||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
import textwrap
|
import textwrap
|
||||||
|
import urllib.parse
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@ -45,12 +46,14 @@ PERCENT_THRESHOLDS = {
|
|||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NAMESPACE_CPU_WINDOW = "1m"
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Cluster metadata
|
# Cluster metadata
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
CONTROL_PLANE_NODES = ["titan-0a", "titan-0b", "titan-0c"]
|
CONTROL_PLANE_NODES = ["titan-0a", "titan-0b", "titan-0c"]
|
||||||
CONTROL_DEPENDENCIES = ["titan-db"]
|
CONTROL_DEPENDENCIES = ["titan-db", "titan-jh"]
|
||||||
CONTROL_ALL = CONTROL_PLANE_NODES + CONTROL_DEPENDENCIES
|
CONTROL_ALL = CONTROL_PLANE_NODES + CONTROL_DEPENDENCIES
|
||||||
WORKER_NODES = [
|
WORKER_NODES = [
|
||||||
"titan-04",
|
"titan-04",
|
||||||
@ -61,11 +64,12 @@ WORKER_NODES = [
|
|||||||
"titan-09",
|
"titan-09",
|
||||||
"titan-10",
|
"titan-10",
|
||||||
"titan-11",
|
"titan-11",
|
||||||
|
"titan-20",
|
||||||
|
"titan-21",
|
||||||
"titan-12",
|
"titan-12",
|
||||||
"titan-13",
|
"titan-13",
|
||||||
"titan-14",
|
"titan-14",
|
||||||
"titan-15",
|
"titan-15",
|
||||||
"titan-16",
|
|
||||||
"titan-17",
|
"titan-17",
|
||||||
"titan-18",
|
"titan-18",
|
||||||
"titan-19",
|
"titan-19",
|
||||||
@ -80,7 +84,22 @@ CONTROL_TOTAL = len(CONTROL_PLANE_NODES)
|
|||||||
WORKER_TOTAL = len(WORKER_NODES)
|
WORKER_TOTAL = len(WORKER_NODES)
|
||||||
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
|
CONTROL_SUFFIX = f"/{CONTROL_TOTAL}"
|
||||||
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
|
WORKER_SUFFIX = f"/{WORKER_TOTAL}"
|
||||||
CP_ALLOWED_NS = "kube-system|kube-public|kube-node-lease|longhorn-system|monitoring|flux-system"
|
# Namespaces considered infrastructure (excluded from workload counts)
|
||||||
|
INFRA_NAMESPACES = [
|
||||||
|
"kube-system",
|
||||||
|
"longhorn-system",
|
||||||
|
"metallb-system",
|
||||||
|
"monitoring",
|
||||||
|
"logging",
|
||||||
|
"cert-manager",
|
||||||
|
"flux-system",
|
||||||
|
"traefik",
|
||||||
|
"maintenance",
|
||||||
|
"postgres",
|
||||||
|
]
|
||||||
|
INFRA_REGEX = f"^({'|'.join(INFRA_NAMESPACES)})$"
|
||||||
|
# Namespaces allowed on control plane without counting as workloads
|
||||||
|
CP_ALLOWED_NS = INFRA_REGEX
|
||||||
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
|
LONGHORN_NODE_REGEX = "titan-1[2-9]|titan-2[24]"
|
||||||
GAUGE_WIDTHS = [4, 3, 3, 4, 3, 3, 4]
|
GAUGE_WIDTHS = [4, 3, 3, 4, 3, 3, 4]
|
||||||
CONTROL_WORKLOADS_EXPR = (
|
CONTROL_WORKLOADS_EXPR = (
|
||||||
@ -170,22 +189,48 @@ def node_io_expr(scope=""):
|
|||||||
return scoped_node_expr(base, scope)
|
return scoped_node_expr(base, scope)
|
||||||
|
|
||||||
|
|
||||||
|
def namespace_selector(scope_var):
|
||||||
|
return f'namespace!="",pod!="",container!="",container!="POD",{scope_var}'
|
||||||
|
|
||||||
|
|
||||||
|
def namespace_gpu_selector(scope_var):
|
||||||
|
return f'namespace!="",pod!="",{scope_var}'
|
||||||
|
|
||||||
|
|
||||||
|
def namespace_cpu_raw(scope_var):
|
||||||
|
return (
|
||||||
|
"sum(rate(container_cpu_usage_seconds_total"
|
||||||
|
f"{{{namespace_selector(scope_var)}}}[{NAMESPACE_CPU_WINDOW}])) by (namespace)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def namespace_ram_raw(scope_var):
|
||||||
|
return f"sum(container_memory_working_set_bytes{{{namespace_selector(scope_var)}}}) by (namespace)"
|
||||||
|
|
||||||
|
|
||||||
|
def namespace_gpu_usage_instant(scope_var):
|
||||||
|
return f"sum(DCGM_FI_DEV_GPU_UTIL{{{namespace_gpu_selector(scope_var)}}}) by (namespace)"
|
||||||
|
|
||||||
|
|
||||||
def namespace_share_expr(resource_expr):
|
def namespace_share_expr(resource_expr):
|
||||||
selected = f"( {resource_expr} ) and on(namespace) ( {NAMESPACE_TOP_FILTER} )"
|
total = f"clamp_min(sum( {resource_expr} ), 1)"
|
||||||
total = f"clamp_min(sum( {selected} ), 1)"
|
return f"100 * ( {resource_expr} ) / {total}"
|
||||||
return f"100 * ( {selected} ) / {total}"
|
|
||||||
|
|
||||||
|
|
||||||
def namespace_cpu_share_expr():
|
def namespace_cpu_share_expr(scope_var):
|
||||||
return namespace_share_expr(NAMESPACE_CPU_RAW)
|
return namespace_share_expr(namespace_cpu_raw(scope_var))
|
||||||
|
|
||||||
|
|
||||||
def namespace_ram_share_expr():
|
def namespace_ram_share_expr(scope_var):
|
||||||
return namespace_share_expr(NAMESPACE_RAM_RAW)
|
return namespace_share_expr(namespace_ram_raw(scope_var))
|
||||||
|
|
||||||
|
|
||||||
def namespace_gpu_share_expr():
|
def namespace_gpu_share_expr(scope_var):
|
||||||
return namespace_share_expr(NAMESPACE_GPU_RAW)
|
usage = namespace_gpu_usage_instant(scope_var)
|
||||||
|
total = f"(sum({usage}) or on() vector(0))"
|
||||||
|
share = f"100 * ({usage}) / clamp_min({total}, 1)"
|
||||||
|
idle = 'label_replace(vector(100), "namespace", "idle", "", "") and on() (' + total + " == 0)"
|
||||||
|
return f"({share}) or ({idle})"
|
||||||
|
|
||||||
|
|
||||||
PROBLEM_PODS_EXPR = (
|
PROBLEM_PODS_EXPR = (
|
||||||
@ -270,46 +315,12 @@ STUCK_TABLE_EXPR = (
|
|||||||
")"
|
")"
|
||||||
)
|
)
|
||||||
|
|
||||||
NAMESPACE_CPU_RAW = (
|
NAMESPACE_SCOPE_WORKLOAD = f'namespace!~"{INFRA_REGEX}"'
|
||||||
'sum(rate(container_cpu_usage_seconds_total{namespace!="",pod!="",container!=""}[5m])) by (namespace)'
|
NAMESPACE_SCOPE_ALL = 'namespace=~".*"'
|
||||||
)
|
NAMESPACE_SCOPE_INFRA = f'namespace=~"{INFRA_REGEX}"'
|
||||||
NAMESPACE_RAM_RAW = (
|
NAMESPACE_SCOPE_VARS = ["namespace_scope_cpu", "namespace_scope_gpu", "namespace_scope_ram"]
|
||||||
'sum(container_memory_working_set_bytes{namespace!="",pod!="",container!=""}) by (namespace)'
|
|
||||||
)
|
|
||||||
GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"]
|
GPU_NODES = ["titan-20", "titan-21", "titan-22", "titan-24"]
|
||||||
GPU_NODE_REGEX = "|".join(GPU_NODES)
|
GPU_NODE_REGEX = "|".join(GPU_NODES)
|
||||||
NAMESPACE_GPU_ALLOC = (
|
|
||||||
'sum((kube_pod_container_resource_requests{namespace!="",resource="nvidia.com/gpu"}'
|
|
||||||
' or kube_pod_container_resource_limits{namespace!="",resource="nvidia.com/gpu"})) by (namespace)'
|
|
||||||
)
|
|
||||||
NAMESPACE_GPU_USAGE_SHARE = (
|
|
||||||
'sum by (namespace) (max_over_time(DCGM_FI_DEV_GPU_UTIL{namespace!=\"\",pod!=\"\"}[$__range]))'
|
|
||||||
)
|
|
||||||
NAMESPACE_GPU_USAGE_INSTANT = 'sum(DCGM_FI_DEV_GPU_UTIL{namespace!="",pod!=""}) by (namespace)'
|
|
||||||
NAMESPACE_GPU_RAW = (
|
|
||||||
"("
|
|
||||||
+ NAMESPACE_GPU_USAGE_SHARE
|
|
||||||
+ ") or on(namespace) ("
|
|
||||||
+ NAMESPACE_CPU_RAW
|
|
||||||
+ " * 0)"
|
|
||||||
)
|
|
||||||
NAMESPACE_GPU_WEIGHT = (
|
|
||||||
"("
|
|
||||||
+ NAMESPACE_GPU_ALLOC
|
|
||||||
+ ") or on(namespace) ("
|
|
||||||
+ NAMESPACE_CPU_RAW
|
|
||||||
+ " * 0)"
|
|
||||||
)
|
|
||||||
NAMESPACE_ACTIVITY_SCORE = (
|
|
||||||
"( "
|
|
||||||
+ NAMESPACE_CPU_RAW
|
|
||||||
+ " ) + ("
|
|
||||||
+ NAMESPACE_RAM_RAW
|
|
||||||
+ " / 1e9) + ("
|
|
||||||
+ NAMESPACE_GPU_WEIGHT
|
|
||||||
+ " * 100)"
|
|
||||||
)
|
|
||||||
NAMESPACE_TOP_FILTER = "(topk(10, " + NAMESPACE_ACTIVITY_SCORE + ") >= bool 0)"
|
|
||||||
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
|
TRAEFIK_ROUTER_EXPR = "sum by (router) (rate(traefik_router_requests_total[5m]))"
|
||||||
TRAEFIK_NET_INGRESS = (
|
TRAEFIK_NET_INGRESS = (
|
||||||
'sum(rate(container_network_receive_bytes_total{namespace="traefik",pod=~"traefik-.*"}[5m]))'
|
'sum(rate(container_network_receive_bytes_total{namespace="traefik",pod=~"traefik-.*"}[5m]))'
|
||||||
@ -560,9 +571,9 @@ def table_panel(
|
|||||||
return panel
|
return panel
|
||||||
|
|
||||||
|
|
||||||
def pie_panel(panel_id, title, expr, grid):
|
def pie_panel(panel_id, title, expr, grid, *, links=None, description=None):
|
||||||
"""Return a pie chart panel with readable namespace labels."""
|
"""Return a pie chart panel with readable namespace labels."""
|
||||||
return {
|
panel = {
|
||||||
"id": panel_id,
|
"id": panel_id,
|
||||||
"type": "piechart",
|
"type": "piechart",
|
||||||
"title": title,
|
"title": title,
|
||||||
@ -586,6 +597,71 @@ def pie_panel(panel_id, title, expr, grid):
|
|||||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
|
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
if links:
|
||||||
|
panel["links"] = links
|
||||||
|
if description:
|
||||||
|
panel["description"] = description
|
||||||
|
return panel
|
||||||
|
|
||||||
|
|
||||||
|
def namespace_scope_variable(var_name, label):
|
||||||
|
options = [
|
||||||
|
{
|
||||||
|
"text": "workload namespaces only",
|
||||||
|
"value": NAMESPACE_SCOPE_WORKLOAD,
|
||||||
|
"selected": True,
|
||||||
|
},
|
||||||
|
{"text": "all namespaces", "value": NAMESPACE_SCOPE_ALL, "selected": False},
|
||||||
|
{
|
||||||
|
"text": "infrastructure namespaces only",
|
||||||
|
"value": NAMESPACE_SCOPE_INFRA,
|
||||||
|
"selected": False,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
query = (
|
||||||
|
"workload namespaces only : "
|
||||||
|
+ NAMESPACE_SCOPE_WORKLOAD
|
||||||
|
+ ",all namespaces : "
|
||||||
|
+ NAMESPACE_SCOPE_ALL
|
||||||
|
+ ",infrastructure namespaces only : "
|
||||||
|
+ NAMESPACE_SCOPE_INFRA
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"name": var_name,
|
||||||
|
"label": label,
|
||||||
|
"type": "custom",
|
||||||
|
"query": query,
|
||||||
|
"current": {"text": options[0]["text"], "value": options[0]["value"], "selected": True},
|
||||||
|
"options": options,
|
||||||
|
"hide": 2,
|
||||||
|
"multi": False,
|
||||||
|
"includeAll": False,
|
||||||
|
"refresh": 1,
|
||||||
|
"sort": 0,
|
||||||
|
"skipUrlSync": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def namespace_scope_links(var_name):
|
||||||
|
def with_value(value):
|
||||||
|
encoded = urllib.parse.quote(value, safe="")
|
||||||
|
params = []
|
||||||
|
for other in NAMESPACE_SCOPE_VARS:
|
||||||
|
if other == var_name:
|
||||||
|
params.append(f"var-{other}={encoded}")
|
||||||
|
else:
|
||||||
|
params.append(f"var-{other}=${{{other}}}")
|
||||||
|
return "?" + "&".join(params)
|
||||||
|
|
||||||
|
return [
|
||||||
|
{"title": "Workload namespaces only", "url": with_value(NAMESPACE_SCOPE_WORKLOAD), "targetBlank": False},
|
||||||
|
{"title": "All namespaces", "url": with_value(NAMESPACE_SCOPE_ALL), "targetBlank": False},
|
||||||
|
{
|
||||||
|
"title": "Infrastructure namespaces only",
|
||||||
|
"url": with_value(NAMESPACE_SCOPE_INFRA),
|
||||||
|
"targetBlank": False,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def bargauge_panel(
|
def bargauge_panel(
|
||||||
@ -857,6 +933,115 @@ def build_overview():
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
mail_bounce_rate_thresholds = {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{"color": "green", "value": None},
|
||||||
|
{"color": "yellow", "value": 5},
|
||||||
|
{"color": "orange", "value": 8},
|
||||||
|
{"color": "red", "value": 10},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
mail_limit_thresholds = {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{"color": "green", "value": None},
|
||||||
|
{"color": "yellow", "value": 70},
|
||||||
|
{"color": "orange", "value": 85},
|
||||||
|
{"color": "red", "value": 95},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
mail_success_thresholds = {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{"color": "red", "value": None},
|
||||||
|
{"color": "orange", "value": 90},
|
||||||
|
{"color": "yellow", "value": 95},
|
||||||
|
{"color": "green", "value": 98},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
panels.append(
|
||||||
|
stat_panel(
|
||||||
|
30,
|
||||||
|
"Mail Sent (1d)",
|
||||||
|
'max(postmark_outbound_sent{window="1d"})',
|
||||||
|
{"h": 2, "w": 6, "x": 0, "y": 8},
|
||||||
|
unit="none",
|
||||||
|
links=link_to("atlas-mail"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
{
|
||||||
|
"id": 31,
|
||||||
|
"type": "stat",
|
||||||
|
"title": "Mail Bounces (1d)",
|
||||||
|
"datasource": PROM_DS,
|
||||||
|
"gridPos": {"h": 2, "w": 6, "x": 12, "y": 8},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Rate",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": 'max(postmark_outbound_bounced{window="1d"})',
|
||||||
|
"refId": "B",
|
||||||
|
"legendFormat": "Count",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {"mode": "thresholds"},
|
||||||
|
"custom": {"displayMode": "auto"},
|
||||||
|
"thresholds": mail_bounce_rate_thresholds,
|
||||||
|
"unit": "none",
|
||||||
|
},
|
||||||
|
"overrides": [
|
||||||
|
{
|
||||||
|
"matcher": {"id": "byName", "options": "Rate"},
|
||||||
|
"properties": [{"id": "unit", "value": "percent"}],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"matcher": {"id": "byName", "options": "Count"},
|
||||||
|
"properties": [{"id": "unit", "value": "none"}],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "center",
|
||||||
|
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
|
||||||
|
"textMode": "name_and_value",
|
||||||
|
},
|
||||||
|
"links": link_to("atlas-mail"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
stat_panel(
|
||||||
|
32,
|
||||||
|
"Mail Success Rate (1d)",
|
||||||
|
'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
|
||||||
|
{"h": 2, "w": 6, "x": 6, "y": 8},
|
||||||
|
unit="percent",
|
||||||
|
thresholds=mail_success_thresholds,
|
||||||
|
decimals=1,
|
||||||
|
links=link_to("atlas-mail"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
stat_panel(
|
||||||
|
33,
|
||||||
|
"Mail Limit Used (30d)",
|
||||||
|
"max(postmark_sending_limit_used_percent)",
|
||||||
|
{"h": 2, "w": 6, "x": 18, "y": 8},
|
||||||
|
unit="percent",
|
||||||
|
thresholds=mail_limit_thresholds,
|
||||||
|
decimals=1,
|
||||||
|
links=link_to("atlas-mail"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
storage_panels = [
|
storage_panels = [
|
||||||
(23, "Astreae Usage", astreae_usage_expr("/mnt/astreae"), "percent"),
|
(23, "Astreae Usage", astreae_usage_expr("/mnt/astreae"), "percent"),
|
||||||
(24, "Asteria Usage", astreae_usage_expr("/mnt/asteria"), "percent"),
|
(24, "Asteria Usage", astreae_usage_expr("/mnt/asteria"), "percent"),
|
||||||
@ -876,28 +1061,38 @@ def build_overview():
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cpu_scope = "$namespace_scope_cpu"
|
||||||
|
gpu_scope = "$namespace_scope_gpu"
|
||||||
|
ram_scope = "$namespace_scope_ram"
|
||||||
|
|
||||||
panels.append(
|
panels.append(
|
||||||
pie_panel(
|
pie_panel(
|
||||||
11,
|
11,
|
||||||
"Namespace CPU Share",
|
"Namespace CPU Share",
|
||||||
namespace_cpu_share_expr(),
|
namespace_cpu_share_expr(cpu_scope),
|
||||||
{"h": 9, "w": 8, "x": 0, "y": 16},
|
{"h": 9, "w": 8, "x": 0, "y": 16},
|
||||||
|
links=namespace_scope_links("namespace_scope_cpu"),
|
||||||
|
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
pie_panel(
|
pie_panel(
|
||||||
12,
|
12,
|
||||||
"Namespace GPU Share",
|
"Namespace GPU Share",
|
||||||
namespace_gpu_share_expr(),
|
namespace_gpu_share_expr(gpu_scope),
|
||||||
{"h": 9, "w": 8, "x": 8, "y": 16},
|
{"h": 9, "w": 8, "x": 8, "y": 16},
|
||||||
|
links=namespace_scope_links("namespace_scope_gpu"),
|
||||||
|
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
pie_panel(
|
pie_panel(
|
||||||
13,
|
13,
|
||||||
"Namespace RAM Share",
|
"Namespace RAM Share",
|
||||||
namespace_ram_share_expr(),
|
namespace_ram_share_expr(ram_scope),
|
||||||
{"h": 9, "w": 8, "x": 16, "y": 16},
|
{"h": 9, "w": 8, "x": 16, "y": 16},
|
||||||
|
links=namespace_scope_links("namespace_scope_ram"),
|
||||||
|
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -1052,7 +1247,6 @@ def build_overview():
|
|||||||
links=link_to("atlas-storage"),
|
links=link_to("atlas-storage"),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"uid": "atlas-overview",
|
"uid": "atlas-overview",
|
||||||
"title": "Atlas Overview",
|
"title": "Atlas Overview",
|
||||||
@ -1063,7 +1257,13 @@ def build_overview():
|
|||||||
"schemaVersion": 39,
|
"schemaVersion": 39,
|
||||||
"style": "dark",
|
"style": "dark",
|
||||||
"tags": ["atlas", "overview"],
|
"tags": ["atlas", "overview"],
|
||||||
"templating": {"list": []},
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
namespace_scope_variable("namespace_scope_cpu", "CPU namespace filter"),
|
||||||
|
namespace_scope_variable("namespace_scope_gpu", "GPU namespace filter"),
|
||||||
|
namespace_scope_variable("namespace_scope_ram", "RAM namespace filter"),
|
||||||
|
]
|
||||||
|
},
|
||||||
"time": {"from": "now-1h", "to": "now"},
|
"time": {"from": "now-1h", "to": "now"},
|
||||||
"refresh": "1m",
|
"refresh": "1m",
|
||||||
"links": [],
|
"links": [],
|
||||||
@ -1513,6 +1713,33 @@ def build_storage_dashboard():
|
|||||||
time_from="90d",
|
time_from="90d",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
panels.append(
|
||||||
|
stat_panel(
|
||||||
|
30,
|
||||||
|
"Maintenance Sweepers Ready",
|
||||||
|
'kube_daemonset_status_number_ready{namespace="maintenance",daemonset="node-image-sweeper"} / on(namespace,daemonset) kube_daemonset_status_desired_number_scheduled{namespace="maintenance",daemonset="node-image-sweeper"} * 100',
|
||||||
|
{"h": 4, "w": 12, "x": 0, "y": 44},
|
||||||
|
unit="percent",
|
||||||
|
thresholds=PERCENT_THRESHOLDS,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
stat_panel(
|
||||||
|
31,
|
||||||
|
"Maintenance Cron Freshness (s)",
|
||||||
|
'time() - max by (cronjob) (kube_cronjob_status_last_successful_time{namespace="maintenance",cronjob=~"image-sweeper|grafana-smtp-sync"})',
|
||||||
|
{"h": 4, "w": 12, "x": 12, "y": 44},
|
||||||
|
unit="s",
|
||||||
|
thresholds={
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{"color": "green", "value": None},
|
||||||
|
{"color": "yellow", "value": 3600},
|
||||||
|
{"color": "red", "value": 10800},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
return {
|
return {
|
||||||
"uid": "atlas-storage",
|
"uid": "atlas-storage",
|
||||||
"title": "Atlas Storage",
|
"title": "Atlas Storage",
|
||||||
@ -1702,21 +1929,231 @@ def build_network_dashboard():
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_mail_dashboard():
|
||||||
|
panels = []
|
||||||
|
|
||||||
|
bounce_rate_thresholds = {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{"color": "green", "value": None},
|
||||||
|
{"color": "yellow", "value": 5},
|
||||||
|
{"color": "orange", "value": 8},
|
||||||
|
{"color": "red", "value": 10},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
limit_thresholds = {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{"color": "green", "value": None},
|
||||||
|
{"color": "yellow", "value": 70},
|
||||||
|
{"color": "orange", "value": 85},
|
||||||
|
{"color": "red", "value": 95},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
success_thresholds = {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{"color": "red", "value": None},
|
||||||
|
{"color": "orange", "value": 90},
|
||||||
|
{"color": "yellow", "value": 95},
|
||||||
|
{"color": "green", "value": 98},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
panels.append(
|
||||||
|
stat_panel(
|
||||||
|
1,
|
||||||
|
"Sent (1d)",
|
||||||
|
'max(postmark_outbound_sent{window="1d"})',
|
||||||
|
{"h": 4, "w": 6, "x": 0, "y": 0},
|
||||||
|
decimals=0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
stat_panel(
|
||||||
|
2,
|
||||||
|
"Sent (7d)",
|
||||||
|
'max(postmark_outbound_sent{window="7d"})',
|
||||||
|
{"h": 4, "w": 6, "x": 6, "y": 0},
|
||||||
|
decimals=0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "stat",
|
||||||
|
"title": "Mail Bounces (1d)",
|
||||||
|
"datasource": PROM_DS,
|
||||||
|
"gridPos": {"h": 4, "w": 6, "x": 12, "y": 0},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": 'max(postmark_outbound_bounce_rate{window="1d"})',
|
||||||
|
"refId": "A",
|
||||||
|
"legendFormat": "Rate",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"expr": 'max(postmark_outbound_bounced{window="1d"})',
|
||||||
|
"refId": "B",
|
||||||
|
"legendFormat": "Count",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"color": {"mode": "thresholds"},
|
||||||
|
"custom": {"displayMode": "auto"},
|
||||||
|
"thresholds": bounce_rate_thresholds,
|
||||||
|
"unit": "none",
|
||||||
|
},
|
||||||
|
"overrides": [
|
||||||
|
{
|
||||||
|
"matcher": {"id": "byName", "options": "Rate"},
|
||||||
|
"properties": [{"id": "unit", "value": "percent"}],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"matcher": {"id": "byName", "options": "Count"},
|
||||||
|
"properties": [{"id": "unit", "value": "none"}],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "center",
|
||||||
|
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": False},
|
||||||
|
"textMode": "name_and_value",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
stat_panel(
|
||||||
|
4,
|
||||||
|
"Success Rate (1d)",
|
||||||
|
'clamp_min(100 - max(postmark_outbound_bounce_rate{window="1d"}), 0)',
|
||||||
|
{"h": 4, "w": 6, "x": 18, "y": 0},
|
||||||
|
unit="percent",
|
||||||
|
thresholds=success_thresholds,
|
||||||
|
decimals=1,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
panels.append(
|
||||||
|
stat_panel(
|
||||||
|
5,
|
||||||
|
"Limit Used (30d)",
|
||||||
|
"max(postmark_sending_limit_used_percent)",
|
||||||
|
{"h": 4, "w": 6, "x": 0, "y": 4},
|
||||||
|
thresholds=limit_thresholds,
|
||||||
|
unit="percent",
|
||||||
|
decimals=1,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
stat_panel(
|
||||||
|
6,
|
||||||
|
"Send Limit (30d)",
|
||||||
|
"max(postmark_sending_limit)",
|
||||||
|
{"h": 4, "w": 6, "x": 6, "y": 4},
|
||||||
|
decimals=0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
stat_panel(
|
||||||
|
7,
|
||||||
|
"Last Success",
|
||||||
|
"max(postmark_last_success_timestamp_seconds)",
|
||||||
|
{"h": 4, "w": 6, "x": 12, "y": 4},
|
||||||
|
unit="dateTimeAsIso",
|
||||||
|
decimals=0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
stat_panel(
|
||||||
|
8,
|
||||||
|
"Exporter Errors",
|
||||||
|
"sum(postmark_request_errors_total)",
|
||||||
|
{"h": 4, "w": 6, "x": 18, "y": 4},
|
||||||
|
decimals=0,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
panels.append(
|
||||||
|
timeseries_panel(
|
||||||
|
13,
|
||||||
|
"Bounce Rate (1d vs 7d)",
|
||||||
|
"max by (window) (postmark_outbound_bounce_rate)",
|
||||||
|
{"h": 8, "w": 12, "x": 0, "y": 12},
|
||||||
|
unit="percent",
|
||||||
|
legend="{{window}}",
|
||||||
|
legend_display="table",
|
||||||
|
legend_placement="right",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
timeseries_panel(
|
||||||
|
14,
|
||||||
|
"Bounced (1d vs 7d)",
|
||||||
|
"max by (window) (postmark_outbound_bounced)",
|
||||||
|
{"h": 8, "w": 12, "x": 12, "y": 12},
|
||||||
|
unit="none",
|
||||||
|
legend="{{window}}",
|
||||||
|
legend_display="table",
|
||||||
|
legend_placement="right",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
timeseries_panel(
|
||||||
|
15,
|
||||||
|
"Sent (1d vs 7d)",
|
||||||
|
"max by (window) (postmark_outbound_sent)",
|
||||||
|
{"h": 8, "w": 12, "x": 0, "y": 20},
|
||||||
|
unit="none",
|
||||||
|
legend="{{window}}",
|
||||||
|
legend_display="table",
|
||||||
|
legend_placement="right",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels.append(
|
||||||
|
timeseries_panel(
|
||||||
|
16,
|
||||||
|
"Exporter Errors",
|
||||||
|
"sum(postmark_request_errors_total)",
|
||||||
|
{"h": 8, "w": 12, "x": 12, "y": 20},
|
||||||
|
unit="none",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"uid": "atlas-mail",
|
||||||
|
"title": "Atlas Mail",
|
||||||
|
"folderUid": PRIVATE_FOLDER,
|
||||||
|
"editable": True,
|
||||||
|
"panels": panels,
|
||||||
|
"time": {"from": "now-30d", "to": "now"},
|
||||||
|
"annotations": {"list": []},
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"style": "dark",
|
||||||
|
"tags": ["atlas", "mail"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def build_gpu_dashboard():
|
def build_gpu_dashboard():
|
||||||
panels = []
|
panels = []
|
||||||
|
gpu_scope = "$namespace_scope_gpu"
|
||||||
panels.append(
|
panels.append(
|
||||||
pie_panel(
|
pie_panel(
|
||||||
1,
|
1,
|
||||||
"Namespace GPU Share",
|
"Namespace GPU Share",
|
||||||
namespace_gpu_share_expr(),
|
namespace_gpu_share_expr(gpu_scope),
|
||||||
{"h": 8, "w": 12, "x": 0, "y": 0},
|
{"h": 8, "w": 12, "x": 0, "y": 0},
|
||||||
|
links=namespace_scope_links("namespace_scope_gpu"),
|
||||||
|
description="Values are normalized within the selected scope; use panel links to switch scope.",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
panels.append(
|
panels.append(
|
||||||
timeseries_panel(
|
timeseries_panel(
|
||||||
2,
|
2,
|
||||||
"GPU Util by Namespace",
|
"GPU Util by Namespace",
|
||||||
NAMESPACE_GPU_USAGE_INSTANT,
|
namespace_gpu_usage_instant(gpu_scope),
|
||||||
{"h": 8, "w": 12, "x": 12, "y": 0},
|
{"h": 8, "w": 12, "x": 12, "y": 0},
|
||||||
unit="percent",
|
unit="percent",
|
||||||
legend="{{namespace}}",
|
legend="{{namespace}}",
|
||||||
@ -1757,6 +2194,13 @@ def build_gpu_dashboard():
|
|||||||
"schemaVersion": 39,
|
"schemaVersion": 39,
|
||||||
"style": "dark",
|
"style": "dark",
|
||||||
"tags": ["atlas", "gpu"],
|
"tags": ["atlas", "gpu"],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
namespace_scope_variable("namespace_scope_cpu", "CPU namespace filter"),
|
||||||
|
namespace_scope_variable("namespace_scope_gpu", "GPU namespace filter"),
|
||||||
|
namespace_scope_variable("namespace_scope_ram", "RAM namespace filter"),
|
||||||
|
]
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1781,6 +2225,10 @@ DASHBOARDS = {
|
|||||||
"builder": build_network_dashboard,
|
"builder": build_network_dashboard,
|
||||||
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-network.yaml",
|
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-network.yaml",
|
||||||
},
|
},
|
||||||
|
"atlas-mail": {
|
||||||
|
"builder": build_mail_dashboard,
|
||||||
|
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-mail.yaml",
|
||||||
|
},
|
||||||
"atlas-gpu": {
|
"atlas-gpu": {
|
||||||
"builder": build_gpu_dashboard,
|
"builder": build_gpu_dashboard,
|
||||||
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-gpu.yaml",
|
"configmap": ROOT / "services" / "monitoring" / "grafana-dashboard-gpu.yaml",
|
||||||
|
|||||||
445
scripts/dashboards_render_logs.py
Executable file
445
scripts/dashboards_render_logs.py
Executable file
@ -0,0 +1,445 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Generate OpenSearch Dashboards saved objects and render them into ConfigMaps.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
scripts/dashboards_render_logs.py --build # rebuild NDJSON + ConfigMap
|
||||||
|
scripts/dashboards_render_logs.py # re-render ConfigMap from NDJSON
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import textwrap
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
DASHBOARD_DIR = ROOT / "services" / "logging" / "dashboards"
|
||||||
|
NDJSON_PATH = DASHBOARD_DIR / "logs.ndjson"
|
||||||
|
CONFIG_PATH = ROOT / "services" / "logging" / "opensearch-dashboards-objects.yaml"
|
||||||
|
|
||||||
|
CONFIG_TEMPLATE = textwrap.dedent(
|
||||||
|
"""# {relative_path}
|
||||||
|
# Generated by scripts/dashboards_render_logs.py --build
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: opensearch-dashboards-objects
|
||||||
|
namespace: logging
|
||||||
|
data:
|
||||||
|
objects.ndjson: |
|
||||||
|
{payload}
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
DASHBOARD_VERSION = "7.10.0"
|
||||||
|
GRID_COLUMNS = 48
|
||||||
|
H_CHART = 10
|
||||||
|
H_ERRORS = 8
|
||||||
|
H_TABLE = 16
|
||||||
|
H_SEARCH = 18
|
||||||
|
TABLE_SIZE = 15
|
||||||
|
TABLE_PER_PAGE = 15
|
||||||
|
|
||||||
|
ERROR_TERMS = ("*error*", "*exception*", "*fail*")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class AppSpec:
|
||||||
|
slug: str
|
||||||
|
title: str
|
||||||
|
query: str
|
||||||
|
index_id: str = "kube-logs"
|
||||||
|
kind: str = "kube"
|
||||||
|
|
||||||
|
|
||||||
|
def error_query(base: str | None = None) -> str:
|
||||||
|
parts = [f'(log : "{term}" or message : "{term}")' for term in ERROR_TERMS]
|
||||||
|
expr = " or ".join(parts)
|
||||||
|
if base:
|
||||||
|
return f"({base}) and ({expr})"
|
||||||
|
return f"({expr})"
|
||||||
|
|
||||||
|
|
||||||
|
def json_line(obj: dict) -> str:
|
||||||
|
return json.dumps(obj, separators=(",", ":"))
|
||||||
|
|
||||||
|
|
||||||
|
def search_source(query: str) -> dict:
|
||||||
|
return {
|
||||||
|
"query": {"language": "kuery", "query": query},
|
||||||
|
"filter": [],
|
||||||
|
"indexRefName": "kibanaSavedObjectMeta.searchSourceJSON.index",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def index_pattern(object_id: str, title: str, time_field: str = "@timestamp") -> dict:
|
||||||
|
return {
|
||||||
|
"type": "index-pattern",
|
||||||
|
"id": object_id,
|
||||||
|
"attributes": {"title": title, "timeFieldName": time_field},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def histogram_vis(object_id: str, title: str, query: str, index_id: str) -> dict:
|
||||||
|
vis_state = {
|
||||||
|
"title": title,
|
||||||
|
"type": "histogram",
|
||||||
|
"aggs": [
|
||||||
|
{"id": "1", "enabled": True, "type": "count", "schema": "metric"},
|
||||||
|
{
|
||||||
|
"id": "2",
|
||||||
|
"enabled": True,
|
||||||
|
"type": "date_histogram",
|
||||||
|
"schema": "segment",
|
||||||
|
"params": {"field": "@timestamp", "interval": "auto", "min_doc_count": 1},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"params": {"addTooltip": True, "addLegend": False, "scale": "linear", "interpolate": "linear"},
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
"type": "visualization",
|
||||||
|
"id": object_id,
|
||||||
|
"attributes": {
|
||||||
|
"title": title,
|
||||||
|
"visState": json.dumps(vis_state, separators=(",", ":")),
|
||||||
|
"uiStateJSON": "{}",
|
||||||
|
"description": "",
|
||||||
|
"version": 1,
|
||||||
|
"kibanaSavedObjectMeta": {
|
||||||
|
"searchSourceJSON": json.dumps(search_source(query), separators=(",", ":"))
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"references": [
|
||||||
|
{
|
||||||
|
"name": "kibanaSavedObjectMeta.searchSourceJSON.index",
|
||||||
|
"type": "index-pattern",
|
||||||
|
"id": index_id,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def table_vis(object_id: str, title: str, field: str, query: str, index_id: str) -> dict:
|
||||||
|
vis_state = {
|
||||||
|
"title": title,
|
||||||
|
"type": "table",
|
||||||
|
"aggs": [
|
||||||
|
{"id": "1", "enabled": True, "type": "count", "schema": "metric"},
|
||||||
|
{
|
||||||
|
"id": "2",
|
||||||
|
"enabled": True,
|
||||||
|
"type": "terms",
|
||||||
|
"schema": "bucket",
|
||||||
|
"params": {"field": field, "size": TABLE_SIZE, "order": "desc", "orderBy": "1"},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"params": {
|
||||||
|
"perPage": TABLE_PER_PAGE,
|
||||||
|
"showPartialRows": False,
|
||||||
|
"showMetricsAtAllLevels": False,
|
||||||
|
"sort": {"columnIndex": 1, "direction": "desc"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
"type": "visualization",
|
||||||
|
"id": object_id,
|
||||||
|
"attributes": {
|
||||||
|
"title": title,
|
||||||
|
"visState": json.dumps(vis_state, separators=(",", ":")),
|
||||||
|
"uiStateJSON": "{}",
|
||||||
|
"description": "",
|
||||||
|
"version": 1,
|
||||||
|
"kibanaSavedObjectMeta": {
|
||||||
|
"searchSourceJSON": json.dumps(search_source(query), separators=(",", ":"))
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"references": [
|
||||||
|
{
|
||||||
|
"name": "kibanaSavedObjectMeta.searchSourceJSON.index",
|
||||||
|
"type": "index-pattern",
|
||||||
|
"id": index_id,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def search_object(object_id: str, title: str, columns: list[str], query: str, index_id: str) -> dict:
|
||||||
|
return {
|
||||||
|
"type": "search",
|
||||||
|
"id": object_id,
|
||||||
|
"attributes": {
|
||||||
|
"title": title,
|
||||||
|
"description": "",
|
||||||
|
"columns": columns,
|
||||||
|
"sort": [["@timestamp", "desc"]],
|
||||||
|
"kibanaSavedObjectMeta": {
|
||||||
|
"searchSourceJSON": json.dumps(search_source(query), separators=(",", ":"))
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"references": [
|
||||||
|
{
|
||||||
|
"name": "kibanaSavedObjectMeta.searchSourceJSON.index",
|
||||||
|
"type": "index-pattern",
|
||||||
|
"id": index_id,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def grid(x: int, y: int, w: int, h: int, i: int) -> dict:
|
||||||
|
return {"x": x, "y": y, "w": w, "h": h, "i": str(i)}
|
||||||
|
|
||||||
|
|
||||||
|
def panel(panel_id: str, panel_type: str, grid_data: dict, index: int) -> dict:
|
||||||
|
return {
|
||||||
|
"panelIndex": str(index),
|
||||||
|
"gridData": grid_data,
|
||||||
|
"id": panel_id,
|
||||||
|
"type": panel_type,
|
||||||
|
"version": DASHBOARD_VERSION,
|
||||||
|
"embeddableConfig": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def full_width_panels(specs: list[tuple[str, str, int]]) -> list[dict]:
|
||||||
|
panels = []
|
||||||
|
y = 0
|
||||||
|
for index, (panel_id, panel_type, height) in enumerate(specs, start=1):
|
||||||
|
panels.append(panel(panel_id, panel_type, grid(0, y, GRID_COLUMNS, height, index), index))
|
||||||
|
y += height
|
||||||
|
return panels
|
||||||
|
|
||||||
|
|
||||||
|
def dashboard_object(object_id: str, title: str, panels: list[dict]) -> dict:
|
||||||
|
return {
|
||||||
|
"type": "dashboard",
|
||||||
|
"id": object_id,
|
||||||
|
"attributes": {
|
||||||
|
"title": title,
|
||||||
|
"description": "",
|
||||||
|
"hits": 0,
|
||||||
|
"panelsJSON": json.dumps(panels, separators=(",", ":")),
|
||||||
|
"optionsJSON": json.dumps({"useMargins": True, "hidePanelTitles": False}, separators=(",", ":")),
|
||||||
|
"version": 1,
|
||||||
|
"timeRestore": False,
|
||||||
|
"kibanaSavedObjectMeta": {
|
||||||
|
"searchSourceJSON": json.dumps({"query": {"language": "kuery", "query": ""}, "filter": []})
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def app_dashboard_objects(app: AppSpec) -> list[dict]:
|
||||||
|
prefix = f"logs-{app.slug}"
|
||||||
|
objects = []
|
||||||
|
|
||||||
|
if app.kind == "journald":
|
||||||
|
columns = ["@timestamp", "_HOSTNAME", "_SYSTEMD_UNIT", "MESSAGE"]
|
||||||
|
objects.append(histogram_vis(f"{prefix}-volume", f"{app.title} logs", app.query, app.index_id))
|
||||||
|
objects.append(histogram_vis(f"{prefix}-errors", f"{app.title} errors", error_query(app.query), app.index_id))
|
||||||
|
objects.append(table_vis(f"{prefix}-top-units", "Top units", "_SYSTEMD_UNIT.keyword", app.query, app.index_id))
|
||||||
|
objects.append(search_object(f"{prefix}-recent", "Recent logs", columns, app.query, app.index_id))
|
||||||
|
objects.append(
|
||||||
|
search_object(
|
||||||
|
f"{prefix}-recent-errors",
|
||||||
|
"Recent errors",
|
||||||
|
columns,
|
||||||
|
error_query(app.query),
|
||||||
|
app.index_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels = full_width_panels(
|
||||||
|
[
|
||||||
|
(f"{prefix}-volume", "visualization", H_CHART),
|
||||||
|
(f"{prefix}-errors", "visualization", H_ERRORS),
|
||||||
|
(f"{prefix}-top-units", "visualization", H_TABLE),
|
||||||
|
(f"{prefix}-recent", "search", H_SEARCH),
|
||||||
|
(f"{prefix}-recent-errors", "search", H_SEARCH),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
objects.append(dashboard_object(prefix, f"{app.title} Logs", panels))
|
||||||
|
return objects
|
||||||
|
|
||||||
|
columns = ["@timestamp", "kubernetes.pod_name", "kubernetes.container_name", "log", "message"]
|
||||||
|
objects.append(histogram_vis(f"{prefix}-volume", f"{app.title} logs", app.query, app.index_id))
|
||||||
|
objects.append(histogram_vis(f"{prefix}-errors", f"{app.title} errors", error_query(app.query), app.index_id))
|
||||||
|
objects.append(table_vis(f"{prefix}-top-pods", "Top pods", "kubernetes.pod_name.keyword", app.query, app.index_id))
|
||||||
|
objects.append(
|
||||||
|
table_vis(f"{prefix}-top-containers", "Top containers", "kubernetes.container_name.keyword", app.query, app.index_id)
|
||||||
|
)
|
||||||
|
objects.append(search_object(f"{prefix}-recent", "Recent logs", columns, app.query, app.index_id))
|
||||||
|
objects.append(
|
||||||
|
search_object(
|
||||||
|
f"{prefix}-recent-errors",
|
||||||
|
"Recent errors",
|
||||||
|
columns,
|
||||||
|
error_query(app.query),
|
||||||
|
app.index_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels = full_width_panels(
|
||||||
|
[
|
||||||
|
(f"{prefix}-volume", "visualization", H_CHART),
|
||||||
|
(f"{prefix}-errors", "visualization", H_ERRORS),
|
||||||
|
(f"{prefix}-top-pods", "visualization", H_TABLE),
|
||||||
|
(f"{prefix}-top-containers", "visualization", H_TABLE),
|
||||||
|
(f"{prefix}-recent", "search", H_SEARCH),
|
||||||
|
(f"{prefix}-recent-errors", "search", H_SEARCH),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
objects.append(dashboard_object(prefix, f"{app.title} Logs", panels))
|
||||||
|
return objects
|
||||||
|
|
||||||
|
|
||||||
|
def overview_objects() -> list[dict]:
|
||||||
|
objects = []
|
||||||
|
objects.append(histogram_vis("logs-overview-volume", "Logs per minute", "*", "kube-logs"))
|
||||||
|
objects.append(histogram_vis("logs-overview-errors", "Errors per minute", error_query(), "kube-logs"))
|
||||||
|
objects.append(
|
||||||
|
table_vis(
|
||||||
|
"logs-overview-top-ns",
|
||||||
|
"Top namespaces",
|
||||||
|
"kubernetes.namespace_name.keyword",
|
||||||
|
"*",
|
||||||
|
"kube-logs",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
objects.append(
|
||||||
|
table_vis(
|
||||||
|
"logs-overview-top-error-ns",
|
||||||
|
"Top error namespaces",
|
||||||
|
"kubernetes.namespace_name.keyword",
|
||||||
|
error_query(),
|
||||||
|
"kube-logs",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
objects.append(table_vis("logs-overview-top-pods", "Top pods", "kubernetes.pod_name.keyword", "*", "kube-logs"))
|
||||||
|
objects.append(
|
||||||
|
table_vis(
|
||||||
|
"logs-overview-top-nodes",
|
||||||
|
"Top nodes",
|
||||||
|
"kubernetes.node_name.keyword",
|
||||||
|
"*",
|
||||||
|
"kube-logs",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
objects.append(
|
||||||
|
search_object(
|
||||||
|
"logs-overview-recent-errors",
|
||||||
|
"Recent errors",
|
||||||
|
["@timestamp", "kubernetes.namespace_name", "kubernetes.pod_name", "log", "message"],
|
||||||
|
error_query(),
|
||||||
|
"kube-logs",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
panels = full_width_panels(
|
||||||
|
[
|
||||||
|
("logs-overview-volume", "visualization", H_CHART),
|
||||||
|
("logs-overview-errors", "visualization", H_ERRORS),
|
||||||
|
("logs-overview-top-ns", "visualization", H_TABLE),
|
||||||
|
("logs-overview-top-error-ns", "visualization", H_TABLE),
|
||||||
|
("logs-overview-top-pods", "visualization", H_TABLE),
|
||||||
|
("logs-overview-top-nodes", "visualization", H_TABLE),
|
||||||
|
("logs-overview-recent-errors", "search", H_SEARCH),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
objects.append(dashboard_object("logs-overview", "Atlas Logs Overview", panels))
|
||||||
|
return objects
|
||||||
|
|
||||||
|
|
||||||
|
def build_objects() -> list[dict]:
|
||||||
|
objects = [
|
||||||
|
index_pattern("kube-logs", "kube-*"),
|
||||||
|
index_pattern("journald-logs", "journald-*"),
|
||||||
|
]
|
||||||
|
|
||||||
|
objects.extend(overview_objects())
|
||||||
|
|
||||||
|
apps = [
|
||||||
|
AppSpec("bstein-dev-home", "bstein-dev-home", 'kubernetes.namespace_name: "bstein-dev-home"'),
|
||||||
|
AppSpec(
|
||||||
|
"pegasus",
|
||||||
|
"pegasus",
|
||||||
|
'kubernetes.namespace_name: "jellyfin" and kubernetes.labels.app: "pegasus"',
|
||||||
|
),
|
||||||
|
AppSpec(
|
||||||
|
"jellyfin",
|
||||||
|
"jellyfin",
|
||||||
|
'kubernetes.namespace_name: "jellyfin" and kubernetes.labels.app: "jellyfin"',
|
||||||
|
),
|
||||||
|
AppSpec("vaultwarden", "vaultwarden", 'kubernetes.namespace_name: "vaultwarden"'),
|
||||||
|
AppSpec("mailu", "mailu", 'kubernetes.namespace_name: "mailu-mailserver"'),
|
||||||
|
AppSpec("nextcloud", "nextcloud", 'kubernetes.namespace_name: "nextcloud"'),
|
||||||
|
AppSpec("gitea", "gitea", 'kubernetes.namespace_name: "gitea"'),
|
||||||
|
AppSpec("jenkins", "jenkins", 'kubernetes.namespace_name: "jenkins"'),
|
||||||
|
AppSpec("harbor", "harbor", 'kubernetes.namespace_name: "harbor"'),
|
||||||
|
AppSpec("vault", "vault", 'kubernetes.namespace_name: "vault"'),
|
||||||
|
AppSpec("keycloak", "keycloak", 'kubernetes.namespace_name: "sso"'),
|
||||||
|
AppSpec("flux-system", "flux-system", 'kubernetes.namespace_name: "flux-system"'),
|
||||||
|
AppSpec("comms", "comms", 'kubernetes.namespace_name: "comms"'),
|
||||||
|
AppSpec(
|
||||||
|
"element-web",
|
||||||
|
"element-web",
|
||||||
|
'kubernetes.namespace_name: "comms" and kubernetes.container_name: "element-web"',
|
||||||
|
),
|
||||||
|
AppSpec(
|
||||||
|
"element-call",
|
||||||
|
"element-call",
|
||||||
|
'kubernetes.namespace_name: "comms" and kubernetes.labels.app: "element-call"',
|
||||||
|
),
|
||||||
|
AppSpec(
|
||||||
|
"matrix-synapse",
|
||||||
|
"matrix-synapse",
|
||||||
|
'kubernetes.namespace_name: "comms" and kubernetes.container_name: "synapse"',
|
||||||
|
),
|
||||||
|
AppSpec(
|
||||||
|
"livekit",
|
||||||
|
"livekit",
|
||||||
|
'kubernetes.namespace_name: "comms" and kubernetes.labels.app: "livekit"',
|
||||||
|
),
|
||||||
|
AppSpec(
|
||||||
|
"coturn",
|
||||||
|
"coturn",
|
||||||
|
'kubernetes.namespace_name: "comms" and kubernetes.labels.app: "coturn"',
|
||||||
|
),
|
||||||
|
AppSpec("lesavka", "lesavka", '_HOSTNAME: "titan-jh"', index_id="journald-logs", kind="journald"),
|
||||||
|
]
|
||||||
|
|
||||||
|
for app in apps:
|
||||||
|
objects.extend(app_dashboard_objects(app))
|
||||||
|
|
||||||
|
return objects
|
||||||
|
|
||||||
|
|
||||||
|
def write_ndjson(objects: list[dict], path: Path) -> None:
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
payload = "\n".join(json_line(obj) for obj in objects)
|
||||||
|
path.write_text(payload + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
def render_configmap(ndjson_path: Path, output_path: Path) -> None:
|
||||||
|
payload_lines = ndjson_path.read_text().splitlines()
|
||||||
|
payload = "\n".join(" " + line for line in payload_lines)
|
||||||
|
relative_path = output_path.relative_to(ROOT)
|
||||||
|
output_path.write_text(CONFIG_TEMPLATE.format(relative_path=relative_path, payload=payload))
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--build", action="store_true", help="Regenerate saved object NDJSON and ConfigMap")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.build:
|
||||||
|
objects = build_objects()
|
||||||
|
write_ndjson(objects, NDJSON_PATH)
|
||||||
|
|
||||||
|
if not NDJSON_PATH.exists():
|
||||||
|
raise SystemExit(f"Missing NDJSON file: {NDJSON_PATH}. Run with --build first.")
|
||||||
|
|
||||||
|
render_configmap(NDJSON_PATH, CONFIG_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
554
scripts/knowledge_render_atlas.py
Normal file
554
scripts/knowledge_render_atlas.py
Normal file
@ -0,0 +1,554 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Render Atlas knowledge artifacts from Flux + kustomize manifests.
|
||||||
|
|
||||||
|
Outputs (committed to git for stable diffs + RAG):
|
||||||
|
- knowledge/catalog/*.yaml
|
||||||
|
- knowledge/diagrams/*.mmd
|
||||||
|
|
||||||
|
This is intentionally conservative:
|
||||||
|
- never includes Secret objects
|
||||||
|
- never includes secret values
|
||||||
|
- keeps output deterministic (sorted)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Iterable
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
|
||||||
|
CLUSTER_SCOPED_KINDS = {
|
||||||
|
"Namespace",
|
||||||
|
"Node",
|
||||||
|
"CustomResourceDefinition",
|
||||||
|
"ClusterRole",
|
||||||
|
"ClusterRoleBinding",
|
||||||
|
"StorageClass",
|
||||||
|
"PersistentVolume",
|
||||||
|
"MutatingWebhookConfiguration",
|
||||||
|
"ValidatingWebhookConfiguration",
|
||||||
|
"APIService",
|
||||||
|
}
|
||||||
|
|
||||||
|
INCLUDED_KINDS = {
|
||||||
|
"Namespace",
|
||||||
|
"Deployment",
|
||||||
|
"StatefulSet",
|
||||||
|
"DaemonSet",
|
||||||
|
"Service",
|
||||||
|
"Ingress",
|
||||||
|
"IngressRoute", # traefik
|
||||||
|
"HelmRelease", # only to harvest ingress hostnames from values
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _run(cmd: list[str], *, cwd: Path) -> str:
|
||||||
|
res = subprocess.run(cmd, cwd=cwd, capture_output=True, text=True, check=False)
|
||||||
|
if res.returncode != 0:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Command failed ({res.returncode}): {' '.join(cmd)}\n{res.stderr.strip()}"
|
||||||
|
)
|
||||||
|
return res.stdout
|
||||||
|
|
||||||
|
|
||||||
|
def kustomize_build(path: Path) -> str:
|
||||||
|
rel = path.relative_to(REPO_ROOT)
|
||||||
|
try:
|
||||||
|
return _run(["kubectl", "kustomize", str(rel)], cwd=REPO_ROOT)
|
||||||
|
except Exception as e:
|
||||||
|
msg = str(e)
|
||||||
|
if "is not in or below" in msg:
|
||||||
|
# Repo uses configMapGenerators that reference ../../scripts/*.py.
|
||||||
|
# Kustomize load restriction must be disabled for a full render.
|
||||||
|
try:
|
||||||
|
return _run(
|
||||||
|
["kubectl", "kustomize", "--load-restrictor=LoadRestrictionsNone", str(rel)],
|
||||||
|
cwd=REPO_ROOT,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return _run(["kustomize", "build", "--load-restrictor=LoadRestrictionsNone", str(rel)], cwd=REPO_ROOT)
|
||||||
|
|
||||||
|
|
||||||
|
def _iter_docs(raw_yaml: str) -> Iterable[dict[str, Any]]:
|
||||||
|
for doc in yaml.safe_load_all(raw_yaml):
|
||||||
|
if not isinstance(doc, dict):
|
||||||
|
continue
|
||||||
|
kind = doc.get("kind")
|
||||||
|
if kind == "List" and isinstance(doc.get("items"), list):
|
||||||
|
for item in doc["items"]:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
yield item
|
||||||
|
continue
|
||||||
|
if kind:
|
||||||
|
yield doc
|
||||||
|
|
||||||
|
|
||||||
|
def _meta(doc: dict[str, Any]) -> tuple[str, str | None]:
|
||||||
|
md = doc.get("metadata") or {}
|
||||||
|
name = md.get("name") or ""
|
||||||
|
namespace = md.get("namespace")
|
||||||
|
return name, namespace
|
||||||
|
|
||||||
|
|
||||||
|
def _is_namespaced(doc: dict[str, Any]) -> bool:
|
||||||
|
kind = doc.get("kind") or ""
|
||||||
|
return kind not in CLUSTER_SCOPED_KINDS
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class FluxKustomization:
|
||||||
|
name: str
|
||||||
|
path: str
|
||||||
|
target_namespace: str | None
|
||||||
|
|
||||||
|
|
||||||
|
def find_flux_kustomizations() -> list[FluxKustomization]:
|
||||||
|
"""Find Flux Kustomization CRs under clusters/atlas/flux-system."""
|
||||||
|
root = REPO_ROOT / "clusters" / "atlas" / "flux-system"
|
||||||
|
items: list[FluxKustomization] = []
|
||||||
|
for file in sorted(root.rglob("*.yaml")):
|
||||||
|
raw = file.read_text()
|
||||||
|
for doc in _iter_docs(raw):
|
||||||
|
if doc.get("kind") != "Kustomization":
|
||||||
|
continue
|
||||||
|
api = str(doc.get("apiVersion") or "")
|
||||||
|
if not api.startswith("kustomize.toolkit.fluxcd.io/"):
|
||||||
|
continue
|
||||||
|
name, _ = _meta(doc)
|
||||||
|
spec = doc.get("spec") or {}
|
||||||
|
path = spec.get("path")
|
||||||
|
if not isinstance(path, str) or not path.strip():
|
||||||
|
continue
|
||||||
|
items.append(
|
||||||
|
FluxKustomization(
|
||||||
|
name=name,
|
||||||
|
path=path.strip().lstrip("./"),
|
||||||
|
target_namespace=spec.get("targetNamespace"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return sorted(items, key=lambda k: k.name)
|
||||||
|
|
||||||
|
|
||||||
|
def _safe_string_scan_for_hosts(value: Any) -> set[str]:
|
||||||
|
"""Best-effort host scan from HelmRelease values without chart rendering."""
|
||||||
|
hosts: set[str] = set()
|
||||||
|
if isinstance(value, str):
|
||||||
|
for m in re.finditer(r"(?i)([a-z0-9-]+(?:\.[a-z0-9-]+)+)", value):
|
||||||
|
host = m.group(1).lower()
|
||||||
|
if host.endswith("bstein.dev"):
|
||||||
|
hosts.add(host)
|
||||||
|
return hosts
|
||||||
|
if isinstance(value, list):
|
||||||
|
for item in value:
|
||||||
|
hosts |= _safe_string_scan_for_hosts(item)
|
||||||
|
return hosts
|
||||||
|
if isinstance(value, dict):
|
||||||
|
for item in value.values():
|
||||||
|
hosts |= _safe_string_scan_for_hosts(item)
|
||||||
|
return hosts
|
||||||
|
return hosts
|
||||||
|
|
||||||
|
|
||||||
|
def _service_ports(svc: dict[str, Any]) -> list[dict[str, Any]]:
|
||||||
|
spec = svc.get("spec") or {}
|
||||||
|
out: list[dict[str, Any]] = []
|
||||||
|
for p in spec.get("ports") or []:
|
||||||
|
if not isinstance(p, dict):
|
||||||
|
continue
|
||||||
|
out.append(
|
||||||
|
{
|
||||||
|
"name": p.get("name"),
|
||||||
|
"port": p.get("port"),
|
||||||
|
"targetPort": p.get("targetPort"),
|
||||||
|
"protocol": p.get("protocol", "TCP"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _workload_labels(doc: dict[str, Any]) -> dict[str, str]:
|
||||||
|
tpl = (doc.get("spec") or {}).get("template") or {}
|
||||||
|
md = tpl.get("metadata") or {}
|
||||||
|
labels = md.get("labels") or {}
|
||||||
|
return {str(k): str(v) for k, v in labels.items()} if isinstance(labels, dict) else {}
|
||||||
|
|
||||||
|
|
||||||
|
def _service_selector(doc: dict[str, Any]) -> dict[str, str]:
|
||||||
|
spec = doc.get("spec") or {}
|
||||||
|
sel = spec.get("selector") or {}
|
||||||
|
return {str(k): str(v) for k, v in sel.items()} if isinstance(sel, dict) else {}
|
||||||
|
|
||||||
|
|
||||||
|
def _selector_matches(selector: dict[str, str], labels: dict[str, str]) -> bool:
|
||||||
|
if not selector:
|
||||||
|
return False
|
||||||
|
return all(labels.get(k) == v for k, v in selector.items())
|
||||||
|
|
||||||
|
|
||||||
|
def _sanitize_node_id(text: str) -> str:
|
||||||
|
return re.sub(r"[^a-zA-Z0-9_]", "_", text)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_catalog(
|
||||||
|
rendered: list[tuple[FluxKustomization, list[dict[str, Any]]]],
|
||||||
|
) -> tuple[dict[str, Any], dict[str, Any], str]:
|
||||||
|
"""Build knowledge catalog + mermaid diagram from rendered docs."""
|
||||||
|
# Index workloads and services for mapping.
|
||||||
|
workloads: dict[tuple[str, str], dict[str, Any]] = {}
|
||||||
|
services: dict[tuple[str, str], dict[str, Any]] = {}
|
||||||
|
ingresses: list[dict[str, Any]] = []
|
||||||
|
ingressroutes: list[dict[str, Any]] = []
|
||||||
|
helmrelease_hosts: dict[str, list[str]] = {}
|
||||||
|
|
||||||
|
for src, docs in rendered:
|
||||||
|
for doc in docs:
|
||||||
|
kind = doc.get("kind")
|
||||||
|
if kind not in INCLUDED_KINDS:
|
||||||
|
continue
|
||||||
|
if kind == "Secret":
|
||||||
|
continue
|
||||||
|
|
||||||
|
name, namespace = _meta(doc)
|
||||||
|
if _is_namespaced(doc) and not namespace and src.target_namespace:
|
||||||
|
namespace = src.target_namespace
|
||||||
|
doc = dict(doc)
|
||||||
|
doc.setdefault("metadata", {})["namespace"] = namespace
|
||||||
|
|
||||||
|
if kind in ("Deployment", "StatefulSet", "DaemonSet"):
|
||||||
|
workloads[(namespace or "", name)] = {
|
||||||
|
"kind": kind,
|
||||||
|
"namespace": namespace or "",
|
||||||
|
"name": name,
|
||||||
|
"labels": _workload_labels(doc),
|
||||||
|
"serviceAccountName": ((doc.get("spec") or {}).get("template") or {})
|
||||||
|
.get("spec", {})
|
||||||
|
.get("serviceAccountName"),
|
||||||
|
"nodeSelector": ((doc.get("spec") or {}).get("template") or {})
|
||||||
|
.get("spec", {})
|
||||||
|
.get("nodeSelector", {}),
|
||||||
|
"images": sorted(
|
||||||
|
{
|
||||||
|
c.get("image")
|
||||||
|
for c in (
|
||||||
|
(((doc.get("spec") or {}).get("template") or {}).get("spec") or {}).get(
|
||||||
|
"containers"
|
||||||
|
)
|
||||||
|
or []
|
||||||
|
)
|
||||||
|
if isinstance(c, dict) and c.get("image")
|
||||||
|
}
|
||||||
|
),
|
||||||
|
}
|
||||||
|
elif kind == "Service":
|
||||||
|
services[(namespace or "", name)] = {
|
||||||
|
"namespace": namespace or "",
|
||||||
|
"name": name,
|
||||||
|
"type": (doc.get("spec") or {}).get("type", "ClusterIP"),
|
||||||
|
"selector": _service_selector(doc),
|
||||||
|
"ports": _service_ports(doc),
|
||||||
|
}
|
||||||
|
elif kind == "Ingress":
|
||||||
|
ingresses.append({"source": src.name, "doc": doc})
|
||||||
|
elif kind == "IngressRoute":
|
||||||
|
ingressroutes.append({"source": src.name, "doc": doc})
|
||||||
|
elif kind == "HelmRelease":
|
||||||
|
spec = doc.get("spec") or {}
|
||||||
|
vals = spec.get("values") or {}
|
||||||
|
hosts = sorted(_safe_string_scan_for_hosts(vals))
|
||||||
|
if hosts:
|
||||||
|
helmrelease_hosts[f"{src.name}:{namespace or ''}/{name}"] = hosts
|
||||||
|
|
||||||
|
# Map services to workloads.
|
||||||
|
service_to_workloads: dict[tuple[str, str], list[dict[str, str]]] = {}
|
||||||
|
for (ns, svc_name), svc in services.items():
|
||||||
|
selector = svc.get("selector") or {}
|
||||||
|
matches: list[dict[str, str]] = []
|
||||||
|
for (w_ns, w_name), w in workloads.items():
|
||||||
|
if w_ns != ns:
|
||||||
|
continue
|
||||||
|
if _selector_matches(selector, w.get("labels") or {}):
|
||||||
|
matches.append({"kind": w["kind"], "name": w_name})
|
||||||
|
service_to_workloads[(ns, svc_name)] = sorted(matches, key=lambda m: (m["kind"], m["name"]))
|
||||||
|
|
||||||
|
# Extract HTTP endpoints.
|
||||||
|
endpoints: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
def add_endpoint(
|
||||||
|
*,
|
||||||
|
host: str,
|
||||||
|
path: str,
|
||||||
|
namespace: str,
|
||||||
|
service: str,
|
||||||
|
port: Any,
|
||||||
|
source: str,
|
||||||
|
kind: str,
|
||||||
|
obj_name: str,
|
||||||
|
):
|
||||||
|
wk = service_to_workloads.get((namespace, service), [])
|
||||||
|
endpoints.append(
|
||||||
|
{
|
||||||
|
"host": host,
|
||||||
|
"path": path,
|
||||||
|
"backend": {
|
||||||
|
"namespace": namespace,
|
||||||
|
"service": service,
|
||||||
|
"port": port,
|
||||||
|
"workloads": wk,
|
||||||
|
},
|
||||||
|
"via": {"kind": kind, "name": obj_name, "source": source},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
for item in ingresses:
|
||||||
|
doc = item["doc"]
|
||||||
|
source = item["source"]
|
||||||
|
name, namespace = _meta(doc)
|
||||||
|
namespace = namespace or ""
|
||||||
|
spec = doc.get("spec") or {}
|
||||||
|
for rule in spec.get("rules") or []:
|
||||||
|
if not isinstance(rule, dict):
|
||||||
|
continue
|
||||||
|
host = (rule.get("host") or "").strip()
|
||||||
|
http = rule.get("http") or {}
|
||||||
|
for p in http.get("paths") or []:
|
||||||
|
if not isinstance(p, dict):
|
||||||
|
continue
|
||||||
|
backend = (p.get("backend") or {}).get("service") or {}
|
||||||
|
svc_name = backend.get("name")
|
||||||
|
svc_port = (backend.get("port") or {}).get("number") or (backend.get("port") or {}).get("name")
|
||||||
|
if not host or not svc_name:
|
||||||
|
continue
|
||||||
|
add_endpoint(
|
||||||
|
host=host,
|
||||||
|
path=p.get("path") or "/",
|
||||||
|
namespace=namespace,
|
||||||
|
service=svc_name,
|
||||||
|
port=svc_port,
|
||||||
|
source=source,
|
||||||
|
kind="Ingress",
|
||||||
|
obj_name=name,
|
||||||
|
)
|
||||||
|
|
||||||
|
host_re = re.compile(r"Host\(`([^`]+)`\)")
|
||||||
|
pathprefix_re = re.compile(r"PathPrefix\(`([^`]+)`\)")
|
||||||
|
for item in ingressroutes:
|
||||||
|
doc = item["doc"]
|
||||||
|
source = item["source"]
|
||||||
|
name, namespace = _meta(doc)
|
||||||
|
namespace = namespace or ""
|
||||||
|
spec = doc.get("spec") or {}
|
||||||
|
for route in spec.get("routes") or []:
|
||||||
|
if not isinstance(route, dict):
|
||||||
|
continue
|
||||||
|
match = route.get("match") or ""
|
||||||
|
hosts = host_re.findall(match)
|
||||||
|
pathprefixes = pathprefix_re.findall(match) or ["/"]
|
||||||
|
for svc in route.get("services") or []:
|
||||||
|
if not isinstance(svc, dict):
|
||||||
|
continue
|
||||||
|
svc_name = svc.get("name")
|
||||||
|
svc_port = svc.get("port")
|
||||||
|
if not svc_name:
|
||||||
|
continue
|
||||||
|
for host in hosts:
|
||||||
|
for pp in pathprefixes:
|
||||||
|
add_endpoint(
|
||||||
|
host=host,
|
||||||
|
path=pp,
|
||||||
|
namespace=namespace,
|
||||||
|
service=svc_name,
|
||||||
|
port=svc_port,
|
||||||
|
source=source,
|
||||||
|
kind="IngressRoute",
|
||||||
|
obj_name=name,
|
||||||
|
)
|
||||||
|
|
||||||
|
endpoints = sorted(
|
||||||
|
endpoints,
|
||||||
|
key=lambda e: (
|
||||||
|
e["host"],
|
||||||
|
e["path"],
|
||||||
|
e["backend"]["namespace"],
|
||||||
|
e["backend"]["service"],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
catalog = {
|
||||||
|
"cluster": "atlas",
|
||||||
|
"sources": [
|
||||||
|
{"name": k.name, "path": k.path, "targetNamespace": k.target_namespace}
|
||||||
|
for k, _ in rendered
|
||||||
|
],
|
||||||
|
"workloads": sorted(
|
||||||
|
list(workloads.values()),
|
||||||
|
key=lambda w: (w["namespace"], w["kind"], w["name"]),
|
||||||
|
),
|
||||||
|
"services": sorted(
|
||||||
|
list(services.values()),
|
||||||
|
key=lambda s: (s["namespace"], s["name"]),
|
||||||
|
),
|
||||||
|
"http_endpoints": endpoints,
|
||||||
|
"helmrelease_host_hints": {k: v for k, v in sorted(helmrelease_hosts.items())},
|
||||||
|
}
|
||||||
|
|
||||||
|
# Mermaid diagram: host -> service -> workload (grouped by namespace).
|
||||||
|
ns_nodes: dict[str, list[str]] = {}
|
||||||
|
lines: list[str] = ["flowchart LR"]
|
||||||
|
edges: set[tuple[str, str]] = set()
|
||||||
|
|
||||||
|
def ensure_ns_node(ns: str, node_id: str):
|
||||||
|
ns_nodes.setdefault(ns, [])
|
||||||
|
if node_id not in ns_nodes[ns]:
|
||||||
|
ns_nodes[ns].append(node_id)
|
||||||
|
|
||||||
|
host_nodes: dict[str, str] = {}
|
||||||
|
|
||||||
|
for ep in endpoints:
|
||||||
|
host = ep["host"]
|
||||||
|
host_id = host_nodes.get(host)
|
||||||
|
if not host_id:
|
||||||
|
host_id = f"host_{_sanitize_node_id(host)}"
|
||||||
|
host_nodes[host] = host_id
|
||||||
|
lines.append(f' {host_id}["{host}"]')
|
||||||
|
|
||||||
|
ns = ep["backend"]["namespace"]
|
||||||
|
svc = ep["backend"]["service"]
|
||||||
|
svc_id = f"svc_{_sanitize_node_id(ns)}_{_sanitize_node_id(svc)}"
|
||||||
|
if svc_id not in ns_nodes.get(ns, []):
|
||||||
|
lines.append(f' {svc_id}["{ns}/{svc} (Service)"]')
|
||||||
|
ensure_ns_node(ns, svc_id)
|
||||||
|
|
||||||
|
if (host_id, svc_id) not in edges:
|
||||||
|
edges.add((host_id, svc_id))
|
||||||
|
lines.append(f" {host_id} --> {svc_id}")
|
||||||
|
|
||||||
|
for w in ep["backend"]["workloads"]:
|
||||||
|
w_id = f"wl_{_sanitize_node_id(ns)}_{_sanitize_node_id(w['name'])}"
|
||||||
|
if w_id not in ns_nodes.get(ns, []):
|
||||||
|
lines.append(f' {w_id}["{ns}/{w["name"]} ({w["kind"]})"]')
|
||||||
|
ensure_ns_node(ns, w_id)
|
||||||
|
if (svc_id, w_id) not in edges:
|
||||||
|
edges.add((svc_id, w_id))
|
||||||
|
lines.append(f" {svc_id} --> {w_id}")
|
||||||
|
|
||||||
|
# Wrap namespace subgraphs at the end for stability (sorted namespaces).
|
||||||
|
if ns_nodes:
|
||||||
|
lines.append("")
|
||||||
|
for ns in sorted(ns_nodes.keys()):
|
||||||
|
lines.append(f" subgraph { _sanitize_node_id(ns) }[{ns}]")
|
||||||
|
for node_id in ns_nodes[ns]:
|
||||||
|
lines.append(f" {node_id}")
|
||||||
|
lines.append(" end")
|
||||||
|
|
||||||
|
diagram = "\n".join(lines).rstrip() + "\n"
|
||||||
|
|
||||||
|
summary = {
|
||||||
|
"counts": {
|
||||||
|
"workloads": len(workloads),
|
||||||
|
"services": len(services),
|
||||||
|
"http_endpoints": len(endpoints),
|
||||||
|
"helmrelease_host_hints": sum(len(v) for v in helmrelease_hosts.values()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return catalog, summary, diagram
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
ap = argparse.ArgumentParser()
|
||||||
|
ap.add_argument("--out", default="knowledge", help="Output base directory (default: knowledge/)")
|
||||||
|
ap.add_argument(
|
||||||
|
"--write",
|
||||||
|
action="store_true",
|
||||||
|
help="Write generated files (otherwise just print a summary).",
|
||||||
|
)
|
||||||
|
args = ap.parse_args()
|
||||||
|
|
||||||
|
out_dir = REPO_ROOT / args.out
|
||||||
|
flux = find_flux_kustomizations()
|
||||||
|
if not flux:
|
||||||
|
print("No Flux Kustomizations found under clusters/atlas/flux-system.", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
rendered: list[tuple[FluxKustomization, list[dict[str, Any]]]] = []
|
||||||
|
for k in flux:
|
||||||
|
path = REPO_ROOT / k.path
|
||||||
|
if not path.exists():
|
||||||
|
continue
|
||||||
|
raw = kustomize_build(path)
|
||||||
|
docs = [d for d in _iter_docs(raw) if d.get("kind") != "Secret"]
|
||||||
|
rendered.append((k, docs))
|
||||||
|
|
||||||
|
rendered = sorted(rendered, key=lambda item: item[0].name)
|
||||||
|
catalog, summary, diagram = extract_catalog(rendered)
|
||||||
|
|
||||||
|
if not args.write:
|
||||||
|
print(json.dumps(summary, indent=2, sort_keys=True))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
(out_dir / "catalog").mkdir(parents=True, exist_ok=True)
|
||||||
|
(out_dir / "diagrams").mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
catalog_path = out_dir / "catalog" / "atlas.yaml"
|
||||||
|
catalog_json_path = out_dir / "catalog" / "atlas.json"
|
||||||
|
summary_path = out_dir / "catalog" / "atlas-summary.json"
|
||||||
|
diagram_path = out_dir / "diagrams" / "atlas-http.mmd"
|
||||||
|
runbooks_json_path = out_dir / "catalog" / "runbooks.json"
|
||||||
|
|
||||||
|
catalog_path.write_text(
|
||||||
|
"# Generated by scripts/knowledge_render_atlas.py (do not edit by hand)\n"
|
||||||
|
+ yaml.safe_dump(catalog, sort_keys=False),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
catalog_json_path.write_text(json.dumps(catalog, indent=2, sort_keys=False) + "\n", encoding="utf-8")
|
||||||
|
summary_path.write_text(json.dumps(summary, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
||||||
|
diagram_path.write_text(diagram, encoding="utf-8")
|
||||||
|
|
||||||
|
# Render runbooks into JSON for lightweight, dependency-free consumption in-cluster.
|
||||||
|
runbooks_dir = out_dir / "runbooks"
|
||||||
|
runbooks: list[dict[str, Any]] = []
|
||||||
|
if runbooks_dir.exists():
|
||||||
|
for md_file in sorted(runbooks_dir.glob("*.md")):
|
||||||
|
raw = md_file.read_text(encoding="utf-8")
|
||||||
|
fm: dict[str, Any] = {}
|
||||||
|
body = raw
|
||||||
|
if raw.startswith("---\n"):
|
||||||
|
try:
|
||||||
|
_, rest = raw.split("---\n", 1)
|
||||||
|
fm_raw, body = rest.split("\n---\n", 1)
|
||||||
|
fm = yaml.safe_load(fm_raw) or {}
|
||||||
|
except Exception:
|
||||||
|
fm = {}
|
||||||
|
body = raw
|
||||||
|
runbooks.append(
|
||||||
|
{
|
||||||
|
"path": str(md_file.relative_to(out_dir)),
|
||||||
|
"title": fm.get("title") or md_file.stem,
|
||||||
|
"tags": fm.get("tags") or [],
|
||||||
|
"entrypoints": fm.get("entrypoints") or [],
|
||||||
|
"source_paths": fm.get("source_paths") or [],
|
||||||
|
"body": body.strip(),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
runbooks_json_path.write_text(json.dumps(runbooks, indent=2, sort_keys=False) + "\n", encoding="utf-8")
|
||||||
|
|
||||||
|
print(f"Wrote {catalog_path.relative_to(REPO_ROOT)}")
|
||||||
|
print(f"Wrote {catalog_json_path.relative_to(REPO_ROOT)}")
|
||||||
|
print(f"Wrote {summary_path.relative_to(REPO_ROOT)}")
|
||||||
|
print(f"Wrote {diagram_path.relative_to(REPO_ROOT)}")
|
||||||
|
print(f"Wrote {runbooks_json_path.relative_to(REPO_ROOT)}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
313
scripts/logging_render_observability.py
Executable file
313
scripts/logging_render_observability.py
Executable file
@ -0,0 +1,313 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Generate OpenSearch Observability seed objects and render them into ConfigMaps.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
scripts/logging_render_observability.py --build # rebuild JSON + ConfigMap
|
||||||
|
scripts/logging_render_observability.py # re-render ConfigMap from JSON
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import textwrap
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
OBS_DIR = ROOT / "services" / "logging" / "observability"
|
||||||
|
APPS_PATH = OBS_DIR / "applications.json"
|
||||||
|
QUERIES_PATH = OBS_DIR / "saved_queries.json"
|
||||||
|
VIS_PATH = OBS_DIR / "saved_visualizations.json"
|
||||||
|
CONFIG_PATH = ROOT / "services" / "logging" / "opensearch-observability-objects.yaml"
|
||||||
|
|
||||||
|
CONFIG_TEMPLATE = textwrap.dedent(
|
||||||
|
"""# {relative_path}
|
||||||
|
# Generated by scripts/logging_render_observability.py --build
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: opensearch-observability-objects
|
||||||
|
namespace: logging
|
||||||
|
data:
|
||||||
|
applications.json: |
|
||||||
|
{applications}
|
||||||
|
saved_queries.json: |
|
||||||
|
{queries}
|
||||||
|
saved_visualizations.json: |
|
||||||
|
{visualizations}
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
DEFAULT_RANGE = {"start": "now-24h", "end": "now", "text": ""}
|
||||||
|
DEFAULT_TIMESTAMP = {"name": "@timestamp", "type": "timestamp"}
|
||||||
|
DEFAULT_FIELDS = {"text": "", "tokens": []}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class AppSpec:
|
||||||
|
name: str
|
||||||
|
base_query: str
|
||||||
|
kind: str = "kube"
|
||||||
|
description: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class QuerySpec:
|
||||||
|
name: str
|
||||||
|
query: str
|
||||||
|
description: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class VisualizationSpec:
|
||||||
|
name: str
|
||||||
|
query: str
|
||||||
|
vis_type: str
|
||||||
|
description: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
def source_query(index: str, where: str | None = None) -> str:
|
||||||
|
query = f"source = {index}"
|
||||||
|
if where:
|
||||||
|
query += f" | where {where}"
|
||||||
|
return query
|
||||||
|
|
||||||
|
|
||||||
|
def error_filter(fields: list[str]) -> str:
|
||||||
|
parts = [f"match({field}, 'error|exception|fail')" for field in fields]
|
||||||
|
return " or ".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def saved_query(spec: QuerySpec) -> dict:
|
||||||
|
return {
|
||||||
|
"name": spec.name,
|
||||||
|
"description": spec.description,
|
||||||
|
"query": spec.query,
|
||||||
|
"selected_date_range": DEFAULT_RANGE,
|
||||||
|
"selected_timestamp": DEFAULT_TIMESTAMP,
|
||||||
|
"selected_fields": DEFAULT_FIELDS,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def saved_visualization(spec: VisualizationSpec) -> dict:
|
||||||
|
return {
|
||||||
|
"name": spec.name,
|
||||||
|
"description": spec.description,
|
||||||
|
"query": spec.query,
|
||||||
|
"type": spec.vis_type,
|
||||||
|
"selected_date_range": DEFAULT_RANGE,
|
||||||
|
"selected_timestamp": DEFAULT_TIMESTAMP,
|
||||||
|
"selected_fields": DEFAULT_FIELDS,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_objects() -> tuple[list[dict], list[dict], list[dict]]:
|
||||||
|
kube_error = error_filter(["log", "message"])
|
||||||
|
journald_error = error_filter(["MESSAGE"])
|
||||||
|
|
||||||
|
apps = [
|
||||||
|
AppSpec("bstein-dev-home", source_query("kube-*", "kubernetes.namespace_name = 'bstein-dev-home'")),
|
||||||
|
AppSpec(
|
||||||
|
"pegasus",
|
||||||
|
source_query(
|
||||||
|
"kube-*",
|
||||||
|
"kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'pegasus'",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
AppSpec(
|
||||||
|
"jellyfin",
|
||||||
|
source_query(
|
||||||
|
"kube-*",
|
||||||
|
"kubernetes.namespace_name = 'jellyfin' and kubernetes.labels.app = 'jellyfin'",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
AppSpec("vaultwarden", source_query("kube-*", "kubernetes.namespace_name = 'vaultwarden'")),
|
||||||
|
AppSpec("mailu", source_query("kube-*", "kubernetes.namespace_name = 'mailu-mailserver'")),
|
||||||
|
AppSpec("nextcloud", source_query("kube-*", "kubernetes.namespace_name = 'nextcloud'")),
|
||||||
|
AppSpec("gitea", source_query("kube-*", "kubernetes.namespace_name = 'gitea'")),
|
||||||
|
AppSpec("jenkins", source_query("kube-*", "kubernetes.namespace_name = 'jenkins'")),
|
||||||
|
AppSpec("harbor", source_query("kube-*", "kubernetes.namespace_name = 'harbor'")),
|
||||||
|
AppSpec("vault", source_query("kube-*", "kubernetes.namespace_name = 'vault'")),
|
||||||
|
AppSpec("keycloak", source_query("kube-*", "kubernetes.namespace_name = 'sso'")),
|
||||||
|
AppSpec("flux-system", source_query("kube-*", "kubernetes.namespace_name = 'flux-system'")),
|
||||||
|
AppSpec("comms", source_query("kube-*", "kubernetes.namespace_name = 'comms'")),
|
||||||
|
AppSpec(
|
||||||
|
"element-web",
|
||||||
|
source_query(
|
||||||
|
"kube-*",
|
||||||
|
"kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'element-web'",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
AppSpec(
|
||||||
|
"element-call",
|
||||||
|
source_query(
|
||||||
|
"kube-*",
|
||||||
|
"kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'element-call'",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
AppSpec(
|
||||||
|
"matrix-synapse",
|
||||||
|
source_query(
|
||||||
|
"kube-*",
|
||||||
|
"kubernetes.namespace_name = 'comms' and kubernetes.container_name = 'synapse'",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
AppSpec(
|
||||||
|
"livekit",
|
||||||
|
source_query(
|
||||||
|
"kube-*",
|
||||||
|
"kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'livekit'",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
AppSpec(
|
||||||
|
"coturn",
|
||||||
|
source_query(
|
||||||
|
"kube-*",
|
||||||
|
"kubernetes.namespace_name = 'comms' and kubernetes.labels.app = 'coturn'",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
AppSpec(
|
||||||
|
"lesavka",
|
||||||
|
source_query("journald-*", "_HOSTNAME = 'titan-jh'"),
|
||||||
|
kind="journald",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
applications = [
|
||||||
|
{
|
||||||
|
"name": app.name,
|
||||||
|
"description": app.description,
|
||||||
|
"baseQuery": app.base_query,
|
||||||
|
"servicesEntities": [],
|
||||||
|
"traceGroups": [app.name],
|
||||||
|
}
|
||||||
|
for app in apps
|
||||||
|
]
|
||||||
|
|
||||||
|
queries = [
|
||||||
|
saved_query(QuerySpec("kube logs", source_query("kube-*"))),
|
||||||
|
saved_query(QuerySpec("kube errors", f"{source_query('kube-*')} | where {kube_error}")),
|
||||||
|
saved_query(QuerySpec("journald logs", source_query("journald-*"))),
|
||||||
|
saved_query(QuerySpec("journald errors", f"{source_query('journald-*')} | where {journald_error}")),
|
||||||
|
]
|
||||||
|
|
||||||
|
for app in apps:
|
||||||
|
query_base = app.base_query
|
||||||
|
error_clause = journald_error if app.kind == "journald" else kube_error
|
||||||
|
queries.append(saved_query(QuerySpec(f"{app.name} logs", query_base)))
|
||||||
|
queries.append(saved_query(QuerySpec(f"{app.name} errors", f"{query_base} | where {error_clause}")))
|
||||||
|
|
||||||
|
visualizations = [
|
||||||
|
saved_visualization(
|
||||||
|
VisualizationSpec(
|
||||||
|
"[Kube] Logs per hour",
|
||||||
|
"source = kube-* | stats count() as log_count by span(`@timestamp`, 1h)",
|
||||||
|
"line",
|
||||||
|
)
|
||||||
|
),
|
||||||
|
saved_visualization(
|
||||||
|
VisualizationSpec(
|
||||||
|
"[Kube] Errors per hour",
|
||||||
|
f"source = kube-* | where {kube_error} | stats count() as error_count by span(`@timestamp`, 1h)",
|
||||||
|
"line",
|
||||||
|
)
|
||||||
|
),
|
||||||
|
saved_visualization(
|
||||||
|
VisualizationSpec(
|
||||||
|
"[Kube] Top namespaces",
|
||||||
|
"source = kube-* | stats count() as log_count by kubernetes.namespace_name | sort - log_count",
|
||||||
|
"bar",
|
||||||
|
)
|
||||||
|
),
|
||||||
|
saved_visualization(
|
||||||
|
VisualizationSpec(
|
||||||
|
"[Kube] Top error namespaces",
|
||||||
|
f"source = kube-* | where {kube_error} | stats count() as error_count by kubernetes.namespace_name | sort - error_count",
|
||||||
|
"bar",
|
||||||
|
)
|
||||||
|
),
|
||||||
|
saved_visualization(
|
||||||
|
VisualizationSpec(
|
||||||
|
"[Kube] Top pods",
|
||||||
|
"source = kube-* | stats count() as log_count by kubernetes.pod_name | sort - log_count",
|
||||||
|
"bar",
|
||||||
|
)
|
||||||
|
),
|
||||||
|
saved_visualization(
|
||||||
|
VisualizationSpec(
|
||||||
|
"[Kube] Top error pods",
|
||||||
|
f"source = kube-* | where {kube_error} | stats count() as error_count by kubernetes.pod_name | sort - error_count",
|
||||||
|
"bar",
|
||||||
|
)
|
||||||
|
),
|
||||||
|
saved_visualization(
|
||||||
|
VisualizationSpec(
|
||||||
|
"[Kube] Top nodes",
|
||||||
|
"source = kube-* | stats count() as log_count by kubernetes.node_name | sort - log_count",
|
||||||
|
"bar",
|
||||||
|
)
|
||||||
|
),
|
||||||
|
saved_visualization(
|
||||||
|
VisualizationSpec(
|
||||||
|
"[Journald] Top units",
|
||||||
|
"source = journald-* | stats count() as log_count by _SYSTEMD_UNIT | sort - log_count",
|
||||||
|
"bar",
|
||||||
|
)
|
||||||
|
),
|
||||||
|
saved_visualization(
|
||||||
|
VisualizationSpec(
|
||||||
|
"[Journald] Top error units",
|
||||||
|
f"source = journald-* | where {journald_error} | stats count() as error_count by _SYSTEMD_UNIT | sort - error_count",
|
||||||
|
"bar",
|
||||||
|
)
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
return applications, queries, visualizations
|
||||||
|
|
||||||
|
|
||||||
|
def write_json(payload: list[dict], path: Path) -> None:
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text(json.dumps(payload, indent=2) + "\n")
|
||||||
|
|
||||||
|
|
||||||
|
def render_configmap(apps_path: Path, queries_path: Path, vis_path: Path, output_path: Path) -> None:
|
||||||
|
relative_path = output_path.relative_to(ROOT)
|
||||||
|
applications = indent_payload(apps_path)
|
||||||
|
queries = indent_payload(queries_path)
|
||||||
|
visualizations = indent_payload(vis_path)
|
||||||
|
output_path.write_text(
|
||||||
|
CONFIG_TEMPLATE.format(
|
||||||
|
relative_path=relative_path,
|
||||||
|
applications=applications,
|
||||||
|
queries=queries,
|
||||||
|
visualizations=visualizations,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def indent_payload(path: Path) -> str:
|
||||||
|
lines = path.read_text().splitlines()
|
||||||
|
return "\n".join(" " + line for line in lines)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--build", action="store_true", help="Regenerate JSON payloads and ConfigMap")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.build:
|
||||||
|
applications, queries, visualizations = build_objects()
|
||||||
|
write_json(applications, APPS_PATH)
|
||||||
|
write_json(queries, QUERIES_PATH)
|
||||||
|
write_json(visualizations, VIS_PATH)
|
||||||
|
|
||||||
|
if not (APPS_PATH.exists() and QUERIES_PATH.exists() and VIS_PATH.exists()):
|
||||||
|
raise SystemExit("Missing observability JSON payloads. Run with --build first.")
|
||||||
|
|
||||||
|
render_configmap(APPS_PATH, QUERIES_PATH, VIS_PATH, CONFIG_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
149
scripts/monitoring_postmark_exporter.py
Normal file
149
scripts/monitoring_postmark_exporter.py
Normal file
@ -0,0 +1,149 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import datetime as dt
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from prometheus_client import Gauge, Info, start_http_server
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Window:
|
||||||
|
label: str
|
||||||
|
days: int
|
||||||
|
|
||||||
|
|
||||||
|
WINDOWS = [
|
||||||
|
Window("today", 0),
|
||||||
|
Window("1d", 1),
|
||||||
|
Window("7d", 7),
|
||||||
|
Window("30d", 30),
|
||||||
|
]
|
||||||
|
|
||||||
|
API_BASE = os.environ.get("POSTMARK_API_BASE", "https://api.postmarkapp.com").rstrip("/")
|
||||||
|
POLL_INTERVAL_SECONDS = int(os.environ.get("POLL_INTERVAL_SECONDS", "60"))
|
||||||
|
LISTEN_ADDRESS = os.environ.get("LISTEN_ADDRESS", "0.0.0.0")
|
||||||
|
LISTEN_PORT = int(os.environ.get("LISTEN_PORT", "8000"))
|
||||||
|
|
||||||
|
PRIMARY_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN", "").strip()
|
||||||
|
FALLBACK_TOKEN = os.environ.get("POSTMARK_SERVER_TOKEN_FALLBACK", "").strip()
|
||||||
|
LIMIT_WINDOW = os.environ.get("POSTMARK_SENDING_LIMIT_WINDOW", "30d").strip()
|
||||||
|
LIMIT_RAW = os.environ.get("POSTMARK_SENDING_LIMIT", "").strip()
|
||||||
|
try:
|
||||||
|
SENDING_LIMIT = float(LIMIT_RAW) if LIMIT_RAW else 0.0
|
||||||
|
except ValueError:
|
||||||
|
SENDING_LIMIT = 0.0
|
||||||
|
|
||||||
|
EXPORTER_INFO = Info("postmark_exporter", "Exporter build info")
|
||||||
|
EXPORTER_INFO.info(
|
||||||
|
{
|
||||||
|
"api_base": API_BASE,
|
||||||
|
"windows": ",".join(window.label for window in WINDOWS),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
POSTMARK_API_UP = Gauge("postmark_api_up", "Whether Postmark API is reachable (1) or not (0)")
|
||||||
|
POSTMARK_LAST_SUCCESS = Gauge(
|
||||||
|
"postmark_last_success_timestamp_seconds",
|
||||||
|
"Unix timestamp of the last successful Postmark stats refresh",
|
||||||
|
)
|
||||||
|
POSTMARK_REQUEST_ERRORS = Gauge(
|
||||||
|
"postmark_request_errors_total",
|
||||||
|
"Total Postmark stats request errors since exporter start",
|
||||||
|
)
|
||||||
|
|
||||||
|
POSTMARK_OUTBOUND_SENT = Gauge(
|
||||||
|
"postmark_outbound_sent",
|
||||||
|
"Outbound emails sent within the selected window",
|
||||||
|
labelnames=("window",),
|
||||||
|
)
|
||||||
|
POSTMARK_OUTBOUND_BOUNCED = Gauge(
|
||||||
|
"postmark_outbound_bounced",
|
||||||
|
"Outbound emails bounced within the selected window",
|
||||||
|
labelnames=("window",),
|
||||||
|
)
|
||||||
|
POSTMARK_OUTBOUND_BOUNCE_RATE = Gauge(
|
||||||
|
"postmark_outbound_bounce_rate",
|
||||||
|
"Outbound bounce rate percentage within the selected window",
|
||||||
|
labelnames=("window",),
|
||||||
|
)
|
||||||
|
POSTMARK_SENDING_LIMIT_GAUGE = Gauge(
|
||||||
|
"postmark_sending_limit",
|
||||||
|
"Configured Postmark sending limit for the active account",
|
||||||
|
)
|
||||||
|
POSTMARK_SENDING_LIMIT_USED = Gauge(
|
||||||
|
"postmark_sending_limit_used",
|
||||||
|
"Messages sent within the configured send limit window",
|
||||||
|
)
|
||||||
|
POSTMARK_SENDING_LIMIT_USED_PERCENT = Gauge(
|
||||||
|
"postmark_sending_limit_used_percent",
|
||||||
|
"Percent of the configured send limit used within the limit window",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_outbound_stats(token: str, window: Window) -> dict:
|
||||||
|
today = dt.date.today()
|
||||||
|
fromdate = today - dt.timedelta(days=window.days)
|
||||||
|
params = {"fromdate": fromdate.isoformat(), "todate": today.isoformat()}
|
||||||
|
headers = {
|
||||||
|
"Accept": "application/json",
|
||||||
|
"X-Postmark-Server-Token": token,
|
||||||
|
}
|
||||||
|
response = requests.get(
|
||||||
|
f"{API_BASE}/stats/outbound",
|
||||||
|
headers=headers,
|
||||||
|
params=params,
|
||||||
|
timeout=15,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
|
||||||
|
def update_metrics(token: str) -> None:
|
||||||
|
sent_by_window = {}
|
||||||
|
for window in WINDOWS:
|
||||||
|
data = fetch_outbound_stats(token, window)
|
||||||
|
sent = int(data.get("Sent", 0) or 0)
|
||||||
|
bounced = int(data.get("Bounced", 0) or 0)
|
||||||
|
rate = (bounced / sent * 100.0) if sent else 0.0
|
||||||
|
sent_by_window[window.label] = sent
|
||||||
|
POSTMARK_OUTBOUND_SENT.labels(window=window.label).set(sent)
|
||||||
|
POSTMARK_OUTBOUND_BOUNCED.labels(window=window.label).set(bounced)
|
||||||
|
POSTMARK_OUTBOUND_BOUNCE_RATE.labels(window=window.label).set(rate)
|
||||||
|
|
||||||
|
POSTMARK_SENDING_LIMIT_GAUGE.set(SENDING_LIMIT)
|
||||||
|
limit_window_sent = sent_by_window.get(LIMIT_WINDOW, 0)
|
||||||
|
POSTMARK_SENDING_LIMIT_USED.set(limit_window_sent)
|
||||||
|
if SENDING_LIMIT:
|
||||||
|
POSTMARK_SENDING_LIMIT_USED_PERCENT.set(limit_window_sent / SENDING_LIMIT * 100.0)
|
||||||
|
else:
|
||||||
|
POSTMARK_SENDING_LIMIT_USED_PERCENT.set(0.0)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
if not PRIMARY_TOKEN and not FALLBACK_TOKEN:
|
||||||
|
raise SystemExit("POSTMARK_SERVER_TOKEN or POSTMARK_SERVER_TOKEN_FALLBACK is required")
|
||||||
|
|
||||||
|
start_http_server(LISTEN_PORT, addr=LISTEN_ADDRESS)
|
||||||
|
|
||||||
|
tokens = [token for token in (PRIMARY_TOKEN, FALLBACK_TOKEN) if token]
|
||||||
|
token_index = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
token = tokens[token_index % len(tokens)]
|
||||||
|
token_index += 1
|
||||||
|
try:
|
||||||
|
update_metrics(token)
|
||||||
|
POSTMARK_API_UP.set(1)
|
||||||
|
POSTMARK_LAST_SUCCESS.set(time.time())
|
||||||
|
except Exception as exc: # noqa: BLE001
|
||||||
|
POSTMARK_API_UP.set(0)
|
||||||
|
POSTMARK_REQUEST_ERRORS.inc()
|
||||||
|
print(f"postmark_exporter: refresh failed: {exc}", flush=True)
|
||||||
|
time.sleep(POLL_INTERVAL_SECONDS)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
35
scripts/monitoring_render_postmark_exporter.py
Normal file
35
scripts/monitoring_render_postmark_exporter.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
def indent(text: str, spaces: int) -> str:
|
||||||
|
prefix = " " * spaces
|
||||||
|
return "".join(prefix + line if line.strip("\n") else line for line in text.splitlines(keepends=True))
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
root = Path(__file__).resolve().parents[1]
|
||||||
|
source = root / "scripts" / "monitoring_postmark_exporter.py"
|
||||||
|
target = root / "services" / "monitoring" / "postmark-exporter-script.yaml"
|
||||||
|
|
||||||
|
payload = source.read_text(encoding="utf-8")
|
||||||
|
if not payload.endswith("\n"):
|
||||||
|
payload += "\n"
|
||||||
|
|
||||||
|
yaml = (
|
||||||
|
f"# services/monitoring/postmark-exporter-script.yaml\n"
|
||||||
|
f"apiVersion: v1\n"
|
||||||
|
f"kind: ConfigMap\n"
|
||||||
|
f"metadata:\n"
|
||||||
|
f" name: postmark-exporter-script\n"
|
||||||
|
f"data:\n"
|
||||||
|
f" monitoring_postmark_exporter.py: |\n"
|
||||||
|
f"{indent(payload, 4)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
target.write_text(yaml, encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -1,49 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
KC_BASE="${KC_BASE:?}"
|
|
||||||
KC_REALM="${KC_REALM:?}"
|
|
||||||
KC_ADMIN_USER="${KC_ADMIN_USER:?}"
|
|
||||||
KC_ADMIN_PASS="${KC_ADMIN_PASS:?}"
|
|
||||||
|
|
||||||
if ! command -v jq >/dev/null 2>&1; then
|
|
||||||
apt-get update && apt-get install -y jq curl >/dev/null
|
|
||||||
fi
|
|
||||||
|
|
||||||
account_exists() {
|
|
||||||
# Skip if the account email is already present in the mail app.
|
|
||||||
runuser -u www-data -- php occ mail:account:list 2>/dev/null | grep -Fq " ${1}" || \
|
|
||||||
runuser -u www-data -- php occ mail:account:list 2>/dev/null | grep -Fq "${1} "
|
|
||||||
}
|
|
||||||
|
|
||||||
token=$(
|
|
||||||
curl -s -d "grant_type=password" \
|
|
||||||
-d "client_id=admin-cli" \
|
|
||||||
-d "username=${KC_ADMIN_USER}" \
|
|
||||||
-d "password=${KC_ADMIN_PASS}" \
|
|
||||||
"${KC_BASE}/realms/master/protocol/openid-connect/token" | jq -r '.access_token'
|
|
||||||
)
|
|
||||||
|
|
||||||
if [[ -z "${token}" || "${token}" == "null" ]]; then
|
|
||||||
echo "Failed to obtain admin token"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
users=$(curl -s -H "Authorization: Bearer ${token}" \
|
|
||||||
"${KC_BASE}/admin/realms/${KC_REALM}/users?max=2000")
|
|
||||||
|
|
||||||
echo "${users}" | jq -c '.[]' | while read -r user; do
|
|
||||||
username=$(echo "${user}" | jq -r '.username')
|
|
||||||
email=$(echo "${user}" | jq -r '.email // empty')
|
|
||||||
app_pw=$(echo "${user}" | jq -r '.attributes.mailu_app_password[0] // empty')
|
|
||||||
[[ -z "${email}" || -z "${app_pw}" ]] && continue
|
|
||||||
if account_exists "${email}"; then
|
|
||||||
echo "Skipping ${email}, already exists"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
echo "Syncing ${email}"
|
|
||||||
runuser -u www-data -- php occ mail:account:create \
|
|
||||||
"${username}" "${username}" "${email}" \
|
|
||||||
mail.bstein.dev 993 ssl "${email}" "${app_pw}" \
|
|
||||||
mail.bstein.dev 587 tls "${email}" "${app_pw}" login || true
|
|
||||||
done
|
|
||||||
@ -1,65 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
NC_URL="${NC_URL:-https://cloud.bstein.dev}"
|
|
||||||
ADMIN_USER="${ADMIN_USER:?}"
|
|
||||||
ADMIN_PASS="${ADMIN_PASS:?}"
|
|
||||||
|
|
||||||
export DEBIAN_FRONTEND=noninteractive
|
|
||||||
apt-get update -qq
|
|
||||||
apt-get install -y -qq curl jq >/dev/null
|
|
||||||
|
|
||||||
run_occ() {
|
|
||||||
runuser -u www-data -- php occ "$@"
|
|
||||||
}
|
|
||||||
|
|
||||||
log() { echo "[$(date -Is)] $*"; }
|
|
||||||
|
|
||||||
log "Applying Atlas theming"
|
|
||||||
run_occ theming:config name "Atlas Cloud"
|
|
||||||
run_occ theming:config slogan "Unified access to Atlas services"
|
|
||||||
run_occ theming:config url "https://cloud.bstein.dev"
|
|
||||||
run_occ theming:config color "#0f172a"
|
|
||||||
run_occ theming:config disable-user-theming yes
|
|
||||||
|
|
||||||
log "Setting default quota to 200 GB"
|
|
||||||
run_occ config:app:set files default_quota --value "200 GB"
|
|
||||||
|
|
||||||
API_BASE="${NC_URL}/ocs/v2.php/apps/external/api/v1"
|
|
||||||
AUTH=(-u "${ADMIN_USER}:${ADMIN_PASS}" -H "OCS-APIRequest: true")
|
|
||||||
|
|
||||||
log "Removing existing external links"
|
|
||||||
existing=$(curl -sf "${AUTH[@]}" "${API_BASE}?format=json" | jq -r '.ocs.data[].id // empty')
|
|
||||||
for id in ${existing}; do
|
|
||||||
curl -sf "${AUTH[@]}" -X DELETE "${API_BASE}/sites/${id}?format=json" >/dev/null || true
|
|
||||||
done
|
|
||||||
|
|
||||||
SITES=(
|
|
||||||
"Vaultwarden|https://vault.bstein.dev"
|
|
||||||
"Jellyfin|https://stream.bstein.dev"
|
|
||||||
"Gitea|https://scm.bstein.dev"
|
|
||||||
"Jenkins|https://ci.bstein.dev"
|
|
||||||
"Harbor|https://registry.bstein.dev"
|
|
||||||
"Vault|https://secret.bstein.dev"
|
|
||||||
"Jitsi|https://meet.bstein.dev"
|
|
||||||
"Grafana|https://metrics.bstein.dev"
|
|
||||||
"Chat LLM|https://chat.ai.bstein.dev"
|
|
||||||
"Vision|https://draw.ai.bstein.dev"
|
|
||||||
"STT/TTS|https://talk.ai.bstein.dev"
|
|
||||||
)
|
|
||||||
|
|
||||||
log "Seeding external links"
|
|
||||||
for entry in "${SITES[@]}"; do
|
|
||||||
IFS="|" read -r name url <<<"${entry}"
|
|
||||||
curl -sf "${AUTH[@]}" -X POST "${API_BASE}/sites?format=json" \
|
|
||||||
-d "name=${name}" \
|
|
||||||
-d "url=${url}" \
|
|
||||||
-d "lang=" \
|
|
||||||
-d "type=link" \
|
|
||||||
-d "device=" \
|
|
||||||
-d "icon=" \
|
|
||||||
-d "groups[]=" \
|
|
||||||
-d "redirect=1" >/dev/null
|
|
||||||
done
|
|
||||||
|
|
||||||
log "Maintenance run completed"
|
|
||||||
509
scripts/test_atlas_user_cleanup.py
Executable file
509
scripts/test_atlas_user_cleanup.py
Executable file
@ -0,0 +1,509 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Clean up Atlas test users and portal requests (manual-only).
|
||||||
|
|
||||||
|
Default behavior is DRY RUN. This script is intended for operators to clean up
|
||||||
|
test accounts created via the bstein-dev-home onboarding portal.
|
||||||
|
|
||||||
|
Targets (best-effort):
|
||||||
|
- Keycloak users in realm "atlas"
|
||||||
|
- Atlas portal Postgres rows (access_requests + dependent tables)
|
||||||
|
- Vaultwarden users/invites created by the portal
|
||||||
|
|
||||||
|
Safety:
|
||||||
|
- Requires an explicit username prefix (e.g. "test-")
|
||||||
|
- Dry-run unless --apply is set
|
||||||
|
- --apply requires an explicit --confirm guard
|
||||||
|
- Validates prefixes to a conservative charset
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Iterable
|
||||||
|
|
||||||
|
|
||||||
|
_SAFE_PREFIX_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]{0,63}$")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class KeycloakUser:
|
||||||
|
user_id: str
|
||||||
|
username: str
|
||||||
|
email: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class PortalRequestRow:
|
||||||
|
request_code: str
|
||||||
|
username: str
|
||||||
|
status: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class VaultwardenUser:
|
||||||
|
user_id: str
|
||||||
|
email: str
|
||||||
|
status: int
|
||||||
|
|
||||||
|
|
||||||
|
def _run(cmd: list[str], *, input_bytes: bytes | None = None) -> str:
|
||||||
|
proc = subprocess.run(
|
||||||
|
cmd,
|
||||||
|
input=input_bytes,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
if proc.returncode != 0:
|
||||||
|
stderr = proc.stderr.decode("utf-8", errors="replace").strip()
|
||||||
|
raise RuntimeError(f"command failed ({proc.returncode}): {' '.join(cmd)}\n{stderr}")
|
||||||
|
return proc.stdout.decode("utf-8", errors="replace")
|
||||||
|
|
||||||
|
|
||||||
|
def _kubectl_get_secret_value(namespace: str, name: str, key: str) -> str:
|
||||||
|
raw_b64 = _run(
|
||||||
|
[
|
||||||
|
"kubectl",
|
||||||
|
"-n",
|
||||||
|
namespace,
|
||||||
|
"get",
|
||||||
|
"secret",
|
||||||
|
name,
|
||||||
|
"-o",
|
||||||
|
f"jsonpath={{.data.{key}}}",
|
||||||
|
]
|
||||||
|
).strip()
|
||||||
|
if not raw_b64:
|
||||||
|
raise RuntimeError(f"secret {namespace}/{name} key {key} is empty")
|
||||||
|
return base64.b64decode(raw_b64).decode("utf-8").strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _kubectl_first_pod(namespace: str) -> str:
|
||||||
|
raw = _run(
|
||||||
|
[
|
||||||
|
"kubectl",
|
||||||
|
"-n",
|
||||||
|
namespace,
|
||||||
|
"get",
|
||||||
|
"pods",
|
||||||
|
"-o",
|
||||||
|
"json",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
data = json.loads(raw)
|
||||||
|
items = data.get("items") or []
|
||||||
|
if not isinstance(items, list) or not items:
|
||||||
|
raise RuntimeError(f"no pods found in namespace {namespace}")
|
||||||
|
pod_name = items[0].get("metadata", {}).get("name")
|
||||||
|
if not isinstance(pod_name, str) or not pod_name:
|
||||||
|
raise RuntimeError(f"unexpected pod list in namespace {namespace}")
|
||||||
|
return pod_name
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_prefixes(prefixes: list[str]) -> list[str]:
|
||||||
|
cleaned: list[str] = []
|
||||||
|
for prefix in prefixes:
|
||||||
|
prefix = prefix.strip()
|
||||||
|
if not prefix:
|
||||||
|
continue
|
||||||
|
if not _SAFE_PREFIX_RE.match(prefix):
|
||||||
|
raise SystemExit(
|
||||||
|
f"invalid prefix '{prefix}': must match {_SAFE_PREFIX_RE.pattern} (alnum plus ._-)"
|
||||||
|
)
|
||||||
|
cleaned.append(prefix)
|
||||||
|
if not cleaned:
|
||||||
|
raise SystemExit("at least one --prefix is required")
|
||||||
|
return cleaned
|
||||||
|
|
||||||
|
|
||||||
|
def _starts_with_any(value: str, prefixes: Iterable[str]) -> bool:
|
||||||
|
return any(value.startswith(p) for p in prefixes)
|
||||||
|
|
||||||
|
|
||||||
|
def _keycloak_token(server: str, realm: str, client_id: str, client_secret: str) -> str:
|
||||||
|
data = urllib.parse.urlencode(
|
||||||
|
{
|
||||||
|
"grant_type": "client_credentials",
|
||||||
|
"client_id": client_id,
|
||||||
|
"client_secret": client_secret,
|
||||||
|
}
|
||||||
|
).encode("utf-8")
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{server}/realms/{realm}/protocol/openid-connect/token",
|
||||||
|
data=data,
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
req.add_header("Content-Type", "application/x-www-form-urlencoded")
|
||||||
|
with urllib.request.urlopen(req, timeout=15) as resp:
|
||||||
|
payload = json.loads(resp.read().decode("utf-8"))
|
||||||
|
token = payload.get("access_token")
|
||||||
|
if not isinstance(token, str) or not token:
|
||||||
|
raise RuntimeError("failed to obtain keycloak access token")
|
||||||
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
def _keycloak_list_users(server: str, realm: str, token: str, search: str) -> list[KeycloakUser]:
|
||||||
|
query = urllib.parse.urlencode({"max": "1000", "search": search})
|
||||||
|
req = urllib.request.Request(f"{server}/admin/realms/{realm}/users?{query}", method="GET")
|
||||||
|
req.add_header("Authorization", f"Bearer {token}")
|
||||||
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||||
|
payload = json.loads(resp.read().decode("utf-8"))
|
||||||
|
if not isinstance(payload, list):
|
||||||
|
raise RuntimeError("unexpected keycloak users response")
|
||||||
|
users: list[KeycloakUser] = []
|
||||||
|
for item in payload:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
user_id = item.get("id")
|
||||||
|
username = item.get("username") or ""
|
||||||
|
email = item.get("email") or ""
|
||||||
|
if not isinstance(user_id, str) or not user_id:
|
||||||
|
continue
|
||||||
|
if not isinstance(username, str):
|
||||||
|
continue
|
||||||
|
users.append(KeycloakUser(user_id=user_id, username=username, email=str(email)))
|
||||||
|
return users
|
||||||
|
|
||||||
|
|
||||||
|
def _keycloak_delete_user(server: str, realm: str, token: str, user_id: str) -> None:
|
||||||
|
req = urllib.request.Request(f"{server}/admin/realms/{realm}/users/{user_id}", method="DELETE")
|
||||||
|
req.add_header("Authorization", f"Bearer {token}")
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||||
|
_ = resp.read()
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
if exc.code == 404:
|
||||||
|
return
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def _psql_json(portal_db_url: str, sql: str) -> list[dict[str, Any]]:
|
||||||
|
postgres_pod = _kubectl_first_pod("postgres")
|
||||||
|
out = _run(
|
||||||
|
[
|
||||||
|
"kubectl",
|
||||||
|
"-n",
|
||||||
|
"postgres",
|
||||||
|
"exec",
|
||||||
|
"-i",
|
||||||
|
postgres_pod,
|
||||||
|
"--",
|
||||||
|
"psql",
|
||||||
|
portal_db_url,
|
||||||
|
"-At",
|
||||||
|
"-F",
|
||||||
|
"\t",
|
||||||
|
"-c",
|
||||||
|
sql,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
rows: list[dict[str, Any]] = []
|
||||||
|
for line in out.splitlines():
|
||||||
|
parts = line.split("\t")
|
||||||
|
rows.append({"cols": parts})
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def _portal_list_requests(portal_db_url: str, prefixes: list[str]) -> list[PortalRequestRow]:
|
||||||
|
clauses = " OR ".join([f"username LIKE '{p}%'" for p in prefixes])
|
||||||
|
sql = (
|
||||||
|
"SELECT request_code, username, status "
|
||||||
|
"FROM access_requests "
|
||||||
|
f"WHERE {clauses} "
|
||||||
|
"ORDER BY created_at DESC;"
|
||||||
|
)
|
||||||
|
raw_rows = _psql_json(portal_db_url, sql)
|
||||||
|
parsed: list[PortalRequestRow] = []
|
||||||
|
for row in raw_rows:
|
||||||
|
cols = row.get("cols") or []
|
||||||
|
if len(cols) < 3:
|
||||||
|
continue
|
||||||
|
parsed.append(PortalRequestRow(request_code=cols[0], username=cols[1], status=cols[2]))
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
|
||||||
|
def _portal_delete_requests(portal_db_url: str, prefixes: list[str]) -> int:
|
||||||
|
clauses = " OR ".join([f"username LIKE '{p}%'" for p in prefixes])
|
||||||
|
sql = f"DELETE FROM access_requests WHERE {clauses};"
|
||||||
|
postgres_pod = _kubectl_first_pod("postgres")
|
||||||
|
out = _run(
|
||||||
|
[
|
||||||
|
"kubectl",
|
||||||
|
"-n",
|
||||||
|
"postgres",
|
||||||
|
"exec",
|
||||||
|
"-i",
|
||||||
|
postgres_pod,
|
||||||
|
"--",
|
||||||
|
"psql",
|
||||||
|
portal_db_url,
|
||||||
|
"-c",
|
||||||
|
sql,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
# psql prints "DELETE <n>"
|
||||||
|
match = re.search(r"DELETE\\s+(\\d+)", out)
|
||||||
|
return int(match.group(1)) if match else 0
|
||||||
|
|
||||||
|
|
||||||
|
def _vaultwarden_admin_cookie(admin_token: str, base_url: str) -> str:
|
||||||
|
data = urllib.parse.urlencode({"token": admin_token}).encode("utf-8")
|
||||||
|
req = urllib.request.Request(f"{base_url}/admin", data=data, method="POST")
|
||||||
|
req.add_header("Content-Type", "application/x-www-form-urlencoded")
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
set_cookie = resp.headers.get("Set-Cookie") or ""
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
if exc.code == 429:
|
||||||
|
raise RuntimeError("vaultwarden admin rate limited (HTTP 429)") from exc
|
||||||
|
raise
|
||||||
|
cookie = set_cookie.split(";", 1)[0].strip()
|
||||||
|
if not cookie:
|
||||||
|
raise RuntimeError("vaultwarden admin cookie missing")
|
||||||
|
return cookie
|
||||||
|
|
||||||
|
|
||||||
|
def _vaultwarden_list_users(base_url: str, cookie: str) -> list[VaultwardenUser]:
|
||||||
|
req = urllib.request.Request(f"{base_url}/admin/users", method="GET")
|
||||||
|
req.add_header("Cookie", cookie)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||||
|
payload = json.loads(resp.read().decode("utf-8"))
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
if exc.code == 429:
|
||||||
|
raise RuntimeError("vaultwarden admin rate limited (HTTP 429)") from exc
|
||||||
|
raise
|
||||||
|
if not isinstance(payload, list):
|
||||||
|
raise RuntimeError("unexpected vaultwarden /admin/users response")
|
||||||
|
users: list[VaultwardenUser] = []
|
||||||
|
for item in payload:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
user_id = item.get("id")
|
||||||
|
email = item.get("email")
|
||||||
|
status = item.get("_status")
|
||||||
|
if not isinstance(user_id, str) or not user_id:
|
||||||
|
continue
|
||||||
|
if not isinstance(email, str) or not email:
|
||||||
|
continue
|
||||||
|
if not isinstance(status, int):
|
||||||
|
status = -1
|
||||||
|
users.append(VaultwardenUser(user_id=user_id, email=email, status=status))
|
||||||
|
return users
|
||||||
|
|
||||||
|
|
||||||
|
def _vaultwarden_delete_user(base_url: str, cookie: str, user_id: str) -> None:
|
||||||
|
req = urllib.request.Request(f"{base_url}/admin/users/{user_id}", method="DELETE")
|
||||||
|
req.add_header("Cookie", cookie)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||||
|
_ = resp.read()
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
if exc.code in {404}:
|
||||||
|
return
|
||||||
|
if exc.code == 429:
|
||||||
|
raise RuntimeError("vaultwarden admin rate limited (HTTP 429)") from exc
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def _port_forward(namespace: str, target: str, local_port: int, remote_port: int) -> subprocess.Popen[bytes]:
|
||||||
|
# Keep stdout/stderr muted to avoid leaking internal details in output.
|
||||||
|
return subprocess.Popen(
|
||||||
|
[
|
||||||
|
"kubectl",
|
||||||
|
"-n",
|
||||||
|
namespace,
|
||||||
|
"port-forward",
|
||||||
|
target,
|
||||||
|
f"{local_port}:{remote_port}",
|
||||||
|
"--address",
|
||||||
|
"127.0.0.1",
|
||||||
|
],
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
parser = argparse.ArgumentParser(description=__doc__)
|
||||||
|
parser.add_argument(
|
||||||
|
"--prefix",
|
||||||
|
action="append",
|
||||||
|
default=[],
|
||||||
|
help="Username prefix to match (repeatable). Example: --prefix test-",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--apply",
|
||||||
|
action="store_true",
|
||||||
|
help="Actually delete; otherwise dry-run only.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--confirm",
|
||||||
|
default="",
|
||||||
|
help=(
|
||||||
|
"Required when using --apply. Must exactly equal the comma-separated "
|
||||||
|
"sorted prefix list (e.g. 'atlas-,bob-,e2e-,test-')."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
parser.add_argument("--skip-keycloak", action="store_true", help="Skip Keycloak user deletion.")
|
||||||
|
parser.add_argument("--skip-portal-db", action="store_true", help="Skip portal DB cleanup.")
|
||||||
|
parser.add_argument("--skip-vaultwarden", action="store_true", help="Skip Vaultwarden cleanup.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--protect-keycloak-username",
|
||||||
|
action="append",
|
||||||
|
default=[],
|
||||||
|
help="Keycloak usernames that must never be deleted (repeatable).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--protect-vaultwarden-email",
|
||||||
|
action="append",
|
||||||
|
default=[],
|
||||||
|
help="Vaultwarden emails that must never be deleted (repeatable).",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
prefixes = sorted(set(_validate_prefixes(args.prefix)))
|
||||||
|
apply = bool(args.apply)
|
||||||
|
expected_confirm = ",".join(prefixes)
|
||||||
|
protected_keycloak = {"bstein", "robotuser", *[u.strip() for u in args.protect_keycloak_username if u.strip()]}
|
||||||
|
protected_vaultwarden = {e.strip() for e in args.protect_vaultwarden_email if e.strip()}
|
||||||
|
|
||||||
|
if apply and args.confirm != expected_confirm:
|
||||||
|
raise SystemExit(
|
||||||
|
f"refusing to apply without --confirm '{expected_confirm}' (got '{args.confirm}')"
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Atlas test-user cleanup")
|
||||||
|
print("prefixes:", expected_confirm)
|
||||||
|
print("mode:", "APPLY (destructive)" if apply else "DRY RUN (no changes)")
|
||||||
|
if protected_keycloak:
|
||||||
|
print("protected keycloak usernames:", ", ".join(sorted(protected_keycloak)))
|
||||||
|
if protected_vaultwarden:
|
||||||
|
print("protected vaultwarden emails:", ", ".join(sorted(protected_vaultwarden)))
|
||||||
|
print()
|
||||||
|
|
||||||
|
if not args.skip_portal_db:
|
||||||
|
portal_db_url = _kubectl_get_secret_value("bstein-dev-home", "atlas-portal-db", "PORTAL_DATABASE_URL")
|
||||||
|
requests = _portal_list_requests(portal_db_url, prefixes)
|
||||||
|
print(f"Portal DB: {len(requests)} access_requests matched")
|
||||||
|
for row in requests[:50]:
|
||||||
|
print(f" {row.request_code}\t{row.status}\t{row.username}")
|
||||||
|
if len(requests) > 50:
|
||||||
|
print(f" ... and {len(requests) - 50} more")
|
||||||
|
if apply and requests:
|
||||||
|
deleted = _portal_delete_requests(portal_db_url, prefixes)
|
||||||
|
print(f"Portal DB: deleted {deleted} access_requests (cascade removes tasks/steps/artifacts).")
|
||||||
|
print()
|
||||||
|
|
||||||
|
if not args.skip_keycloak:
|
||||||
|
kc_server = os.getenv("KEYCLOAK_PUBLIC_URL", "https://sso.bstein.dev").rstrip("/")
|
||||||
|
kc_realm = os.getenv("KEYCLOAK_REALM", "atlas")
|
||||||
|
kc_client_id = os.getenv("KEYCLOAK_ADMIN_CLIENT_ID", "bstein-dev-home-admin")
|
||||||
|
kc_client_secret = _kubectl_get_secret_value(
|
||||||
|
"bstein-dev-home", "bstein-dev-home-keycloak-admin", "client_secret"
|
||||||
|
)
|
||||||
|
token = _keycloak_token(kc_server, kc_realm, kc_client_id, kc_client_secret)
|
||||||
|
found: dict[str, KeycloakUser] = {}
|
||||||
|
for prefix in prefixes:
|
||||||
|
for user in _keycloak_list_users(kc_server, kc_realm, token, prefix):
|
||||||
|
if not _starts_with_any(user.username, prefixes):
|
||||||
|
continue
|
||||||
|
if user.username in protected_keycloak:
|
||||||
|
continue
|
||||||
|
found[user.user_id] = user
|
||||||
|
users = list(found.values())
|
||||||
|
users.sort(key=lambda u: u.username)
|
||||||
|
print(f"Keycloak: {len(users)} users matched")
|
||||||
|
for user in users[:50]:
|
||||||
|
email = user.email or "-"
|
||||||
|
print(f" {user.username}\t{email}\t{user.user_id}")
|
||||||
|
if len(users) > 50:
|
||||||
|
print(f" ... and {len(users) - 50} more")
|
||||||
|
if apply and users:
|
||||||
|
for user in users:
|
||||||
|
_keycloak_delete_user(kc_server, kc_realm, token, user.user_id)
|
||||||
|
print(f"Keycloak: deleted {len(users)} users.")
|
||||||
|
print()
|
||||||
|
|
||||||
|
if not args.skip_vaultwarden:
|
||||||
|
pf = _port_forward("vaultwarden", "svc/vaultwarden-service", 18081, 80)
|
||||||
|
try:
|
||||||
|
# wait briefly for the port-forward to come up
|
||||||
|
for _ in range(30):
|
||||||
|
try:
|
||||||
|
urllib.request.urlopen("http://127.0.0.1:18081/", timeout=1).read(1)
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
time.sleep(0.2)
|
||||||
|
|
||||||
|
admin_token = _kubectl_get_secret_value("vaultwarden", "vaultwarden-admin", "ADMIN_TOKEN")
|
||||||
|
base_url = "http://127.0.0.1:18081"
|
||||||
|
try:
|
||||||
|
cookie = ""
|
||||||
|
for attempt in range(7):
|
||||||
|
try:
|
||||||
|
cookie = _vaultwarden_admin_cookie(admin_token, base_url)
|
||||||
|
break
|
||||||
|
except RuntimeError as exc:
|
||||||
|
if "rate limited" in str(exc).lower():
|
||||||
|
time.sleep(min(60.0, 2.0**attempt))
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
if not cookie:
|
||||||
|
raise RuntimeError("vaultwarden admin login repeatedly rate limited")
|
||||||
|
|
||||||
|
users: list[VaultwardenUser] = []
|
||||||
|
for attempt in range(7):
|
||||||
|
try:
|
||||||
|
users = _vaultwarden_list_users(base_url, cookie)
|
||||||
|
break
|
||||||
|
except RuntimeError as exc:
|
||||||
|
if "rate limited" in str(exc).lower():
|
||||||
|
time.sleep(min(60.0, 2.0**attempt))
|
||||||
|
continue
|
||||||
|
raise
|
||||||
|
if not users:
|
||||||
|
raise RuntimeError("vaultwarden user list unavailable (possibly rate limited)")
|
||||||
|
except RuntimeError as exc:
|
||||||
|
print(f"Vaultwarden: ERROR: {exc}")
|
||||||
|
print()
|
||||||
|
return 1
|
||||||
|
matched: list[VaultwardenUser] = []
|
||||||
|
for user in users:
|
||||||
|
local = user.email.split("@", 1)[0]
|
||||||
|
if _starts_with_any(local, prefixes):
|
||||||
|
if user.email in protected_vaultwarden:
|
||||||
|
continue
|
||||||
|
matched.append(user)
|
||||||
|
matched.sort(key=lambda u: u.email)
|
||||||
|
print(f"Vaultwarden: {len(matched)} users matched")
|
||||||
|
for user in matched[:50]:
|
||||||
|
print(f" {user.email}\tstatus={user.status}\t{user.user_id}")
|
||||||
|
if len(matched) > 50:
|
||||||
|
print(f" ... and {len(matched) - 50} more")
|
||||||
|
if apply and matched:
|
||||||
|
for user in matched:
|
||||||
|
_vaultwarden_delete_user(base_url, cookie, user.user_id)
|
||||||
|
print(f"Vaultwarden: deleted {len(matched)} users.")
|
||||||
|
print()
|
||||||
|
finally:
|
||||||
|
pf.terminate()
|
||||||
|
try:
|
||||||
|
pf.wait(timeout=3)
|
||||||
|
except Exception:
|
||||||
|
pf.kill()
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
276
scripts/test_user_cleanup.py
Executable file
276
scripts/test_user_cleanup.py
Executable file
@ -0,0 +1,276 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
from collections import defaultdict
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Any, Iterable
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from atlas_portal import db, settings
|
||||||
|
from atlas_portal.keycloak import admin_client
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class KeycloakUser:
|
||||||
|
id: str
|
||||||
|
username: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class PortalRequest:
|
||||||
|
request_code: str
|
||||||
|
username: str
|
||||||
|
status: str
|
||||||
|
|
||||||
|
|
||||||
|
def _dedupe_by_id(users: Iterable[KeycloakUser]) -> list[KeycloakUser]:
|
||||||
|
seen: set[str] = set()
|
||||||
|
out: list[KeycloakUser] = []
|
||||||
|
for user in users:
|
||||||
|
if user.id in seen:
|
||||||
|
continue
|
||||||
|
seen.add(user.id)
|
||||||
|
out.append(user)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _iter_keycloak_users_for_prefix(prefix: str, max_results: int) -> list[KeycloakUser]:
|
||||||
|
client = admin_client()
|
||||||
|
if not client.ready():
|
||||||
|
raise RuntimeError("keycloak admin client not configured in this environment")
|
||||||
|
|
||||||
|
url = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users"
|
||||||
|
# Keycloak can return false positives for search; we do a strict prefix match client-side.
|
||||||
|
params = {"search": prefix, "max": str(max_results), "briefRepresentation": "true"}
|
||||||
|
with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http:
|
||||||
|
resp = http.get(url, params=params, headers=client.headers())
|
||||||
|
resp.raise_for_status()
|
||||||
|
payload = resp.json()
|
||||||
|
|
||||||
|
if not isinstance(payload, list):
|
||||||
|
return []
|
||||||
|
|
||||||
|
found: list[KeycloakUser] = []
|
||||||
|
for item in payload:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
username = item.get("username")
|
||||||
|
user_id = item.get("id")
|
||||||
|
if not isinstance(username, str) or not isinstance(user_id, str):
|
||||||
|
continue
|
||||||
|
if not username.startswith(prefix):
|
||||||
|
continue
|
||||||
|
if username.startswith("service-account-"):
|
||||||
|
continue
|
||||||
|
found.append(KeycloakUser(id=user_id, username=username))
|
||||||
|
return found
|
||||||
|
|
||||||
|
|
||||||
|
def _find_keycloak_users(prefixes: list[str], max_results: int, protected: set[str]) -> list[KeycloakUser]:
|
||||||
|
matches: list[KeycloakUser] = []
|
||||||
|
for prefix in prefixes:
|
||||||
|
matches.extend(_iter_keycloak_users_for_prefix(prefix, max_results=max_results))
|
||||||
|
|
||||||
|
deduped = _dedupe_by_id(matches)
|
||||||
|
return [user for user in deduped if user.username not in protected]
|
||||||
|
|
||||||
|
|
||||||
|
def _delete_keycloak_users(users: list[KeycloakUser]) -> None:
|
||||||
|
if not users:
|
||||||
|
return
|
||||||
|
|
||||||
|
client = admin_client()
|
||||||
|
if not client.ready():
|
||||||
|
raise RuntimeError("keycloak admin client not configured in this environment")
|
||||||
|
|
||||||
|
base = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users"
|
||||||
|
with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http:
|
||||||
|
for user in users:
|
||||||
|
url = f"{base}/{quote(user.id, safe='')}"
|
||||||
|
resp = http.delete(url, headers=client.headers())
|
||||||
|
# Deleting a non-existent user is treated as success for idempotency.
|
||||||
|
if resp.status_code == 404:
|
||||||
|
continue
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
|
||||||
|
def _find_portal_requests(prefixes: list[str], max_results: int) -> list[PortalRequest]:
|
||||||
|
if not db.configured():
|
||||||
|
return []
|
||||||
|
|
||||||
|
like_prefixes = [f"{prefix}%" for prefix in prefixes]
|
||||||
|
rows: list[dict[str, Any]] = []
|
||||||
|
with db.connect() as conn:
|
||||||
|
for like in like_prefixes:
|
||||||
|
cursor = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT request_code, username, status
|
||||||
|
FROM access_requests
|
||||||
|
WHERE username LIKE %s
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT %s
|
||||||
|
""",
|
||||||
|
(like, max_results),
|
||||||
|
)
|
||||||
|
batch = cursor.fetchall()
|
||||||
|
if isinstance(batch, list):
|
||||||
|
rows.extend([r for r in batch if isinstance(r, dict)])
|
||||||
|
|
||||||
|
out: list[PortalRequest] = []
|
||||||
|
for row in rows:
|
||||||
|
request_code = row.get("request_code")
|
||||||
|
username = row.get("username")
|
||||||
|
status = row.get("status")
|
||||||
|
if not isinstance(request_code, str) or not isinstance(username, str) or not isinstance(status, str):
|
||||||
|
continue
|
||||||
|
out.append(PortalRequest(request_code=request_code, username=username, status=status))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _delete_portal_requests(prefixes: list[str]) -> int:
|
||||||
|
if not db.configured():
|
||||||
|
return 0
|
||||||
|
|
||||||
|
like_prefixes = [f"{prefix}%" for prefix in prefixes]
|
||||||
|
deleted = 0
|
||||||
|
with db.connect() as conn:
|
||||||
|
for like in like_prefixes:
|
||||||
|
cursor = conn.execute("DELETE FROM access_requests WHERE username LIKE %s", (like,))
|
||||||
|
deleted += cursor.rowcount or 0
|
||||||
|
return deleted
|
||||||
|
|
||||||
|
|
||||||
|
def _summarize_portal_requests(rows: list[PortalRequest]) -> dict[str, int]:
|
||||||
|
counts: dict[str, int] = defaultdict(int)
|
||||||
|
for row in rows:
|
||||||
|
counts[row.status] += 1
|
||||||
|
return dict(counts)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_args(argv: list[str]) -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog="test_user_cleanup",
|
||||||
|
description=(
|
||||||
|
"Manual-only cleanup for test users/requests. "
|
||||||
|
"This script is intended to be run inside the bstein-dev-home backend container."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--prefix",
|
||||||
|
action="append",
|
||||||
|
required=True,
|
||||||
|
help="Username prefix to target (repeatable). Example: --prefix test-",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--max",
|
||||||
|
type=int,
|
||||||
|
default=500,
|
||||||
|
help="Maximum users/requests to enumerate per prefix (default: 500).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--apply",
|
||||||
|
action="store_true",
|
||||||
|
help="Apply deletions (default is dry-run). Requires --confirm.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--confirm",
|
||||||
|
default="",
|
||||||
|
help="Required when using --apply. Must exactly equal the comma-separated prefix list.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--skip-keycloak",
|
||||||
|
action="store_true",
|
||||||
|
help="Skip deleting Keycloak users.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--skip-portal",
|
||||||
|
action="store_true",
|
||||||
|
help="Skip deleting portal (DB) access requests.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--protect",
|
||||||
|
action="append",
|
||||||
|
default=[],
|
||||||
|
help="Extra usernames to never delete (repeatable).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--verbose",
|
||||||
|
action="store_true",
|
||||||
|
help="List matched usernames/request codes.",
|
||||||
|
)
|
||||||
|
return parser.parse_args(argv)
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str]) -> int:
|
||||||
|
args = _parse_args(argv)
|
||||||
|
prefixes = sorted({p.strip() for p in args.prefix if p.strip()})
|
||||||
|
if not prefixes:
|
||||||
|
print("error: no valid --prefix values provided", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
expected_confirm = ",".join(prefixes)
|
||||||
|
protected = {"bstein", "robotuser", *[p.strip() for p in args.protect if p.strip()]}
|
||||||
|
|
||||||
|
if args.apply and args.confirm != expected_confirm:
|
||||||
|
print(
|
||||||
|
f"error: refusing to apply without --confirm '{expected_confirm}' (got '{args.confirm}')",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
keycloak_users: list[KeycloakUser] = []
|
||||||
|
portal_requests: list[PortalRequest] = []
|
||||||
|
|
||||||
|
if not args.skip_keycloak:
|
||||||
|
keycloak_users = _find_keycloak_users(prefixes, max_results=args.max, protected=protected)
|
||||||
|
|
||||||
|
if not args.skip_portal:
|
||||||
|
portal_requests = _find_portal_requests(prefixes, max_results=args.max)
|
||||||
|
|
||||||
|
print(f"prefixes: {expected_confirm}")
|
||||||
|
print(f"mode: {'APPLY' if args.apply else 'DRY-RUN'}")
|
||||||
|
if protected:
|
||||||
|
print(f"protected usernames: {', '.join(sorted(protected))}")
|
||||||
|
|
||||||
|
if not args.skip_keycloak:
|
||||||
|
print(f"keycloak users matched: {len(keycloak_users)}")
|
||||||
|
if args.verbose and keycloak_users:
|
||||||
|
for user in sorted(keycloak_users, key=lambda u: u.username):
|
||||||
|
print(f" - {user.username}")
|
||||||
|
|
||||||
|
if not args.skip_portal:
|
||||||
|
print(f"portal requests matched: {len(portal_requests)}")
|
||||||
|
if portal_requests:
|
||||||
|
summary = _summarize_portal_requests(portal_requests)
|
||||||
|
summary_str = ", ".join(f"{k}={v}" for k, v in sorted(summary.items()))
|
||||||
|
print(f" statuses: {summary_str}")
|
||||||
|
if args.verbose and portal_requests:
|
||||||
|
for req in portal_requests[: min(50, len(portal_requests))]:
|
||||||
|
print(f" - {req.request_code} ({req.status})")
|
||||||
|
if len(portal_requests) > 50:
|
||||||
|
print(f" ... and {len(portal_requests) - 50} more")
|
||||||
|
|
||||||
|
if not args.apply:
|
||||||
|
print("dry-run complete (no changes made)")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if not args.skip_portal:
|
||||||
|
deleted = _delete_portal_requests(prefixes)
|
||||||
|
print(f"deleted portal requests: {deleted}")
|
||||||
|
|
||||||
|
if not args.skip_keycloak:
|
||||||
|
_delete_keycloak_users(keycloak_users)
|
||||||
|
print(f"deleted keycloak users: {len(keycloak_users)}")
|
||||||
|
|
||||||
|
print("done")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main(sys.argv[1:]))
|
||||||
|
|
||||||
18
scripts/test_user_cleanup.sh
Executable file
18
scripts/test_user_cleanup.sh
Executable file
@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Manual-only helper to run `scripts/test_user_cleanup.py` inside the portal backend container.
|
||||||
|
#
|
||||||
|
# Usage (dry-run):
|
||||||
|
# scripts/test_user_cleanup.sh --prefix test-
|
||||||
|
#
|
||||||
|
# Usage (apply):
|
||||||
|
# scripts/test_user_cleanup.sh --prefix test- --apply --confirm test-
|
||||||
|
|
||||||
|
NS="${PORTAL_NAMESPACE:-bstein-dev-home}"
|
||||||
|
TARGET="${PORTAL_BACKEND_EXEC_TARGET:-deploy/bstein-dev-home-backend}"
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
|
||||||
|
|
||||||
|
cat "${SCRIPT_DIR}/test_user_cleanup.py" | kubectl -n "${NS}" exec -i "${TARGET}" -- python - "$@"
|
||||||
|
|
||||||
318
scripts/test_vaultwarden_user_cleanup.py
Executable file
318
scripts/test_vaultwarden_user_cleanup.py
Executable file
@ -0,0 +1,318 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Clean up Vaultwarden test users and invites (manual-only).
|
||||||
|
|
||||||
|
This script deletes Vaultwarden rows directly from the Postgres database. It is
|
||||||
|
intended only for removing test fallout (e.g. e2e-*, test-*) and is deliberately
|
||||||
|
conservative:
|
||||||
|
|
||||||
|
- Requires one or more explicit email prefixes (repeatable).
|
||||||
|
- Dry-run by default; --apply requires an exact --confirm guard.
|
||||||
|
- Refuses to delete any user with dependent data in Vaultwarden tables.
|
||||||
|
- Supports a protected email allowlist to prevent catastrophic mistakes.
|
||||||
|
|
||||||
|
Example (dry-run):
|
||||||
|
scripts/test_vaultwarden_user_cleanup.py --prefix e2e-
|
||||||
|
|
||||||
|
Example (apply):
|
||||||
|
scripts/test_vaultwarden_user_cleanup.py --prefix e2e- --apply --confirm e2e-
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Iterable, Sequence
|
||||||
|
|
||||||
|
|
||||||
|
_SAFE_PREFIX_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._-]{0,63}$")
|
||||||
|
_UUID_RE = re.compile(r"^[0-9a-fA-F-]{32,36}$")
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class VaultwardenUser:
|
||||||
|
uuid: str
|
||||||
|
email: str
|
||||||
|
dependent_rows: int
|
||||||
|
|
||||||
|
|
||||||
|
def _run(cmd: Sequence[str], *, input_bytes: bytes | None = None) -> str:
|
||||||
|
proc = subprocess.run(
|
||||||
|
list(cmd),
|
||||||
|
input=input_bytes,
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.PIPE,
|
||||||
|
check=False,
|
||||||
|
)
|
||||||
|
if proc.returncode != 0:
|
||||||
|
stderr = proc.stderr.decode("utf-8", errors="replace").strip()
|
||||||
|
raise RuntimeError(f"command failed ({proc.returncode}): {' '.join(cmd)}\n{stderr}")
|
||||||
|
return proc.stdout.decode("utf-8", errors="replace")
|
||||||
|
|
||||||
|
|
||||||
|
def _kubectl_first_pod(namespace: str) -> str:
|
||||||
|
raw = _run(["kubectl", "-n", namespace, "get", "pods", "-o", "json"])
|
||||||
|
data = json.loads(raw)
|
||||||
|
items = data.get("items") or []
|
||||||
|
if not isinstance(items, list) or not items:
|
||||||
|
raise RuntimeError(f"no pods found in namespace {namespace}")
|
||||||
|
name = items[0].get("metadata", {}).get("name")
|
||||||
|
if not isinstance(name, str) or not name:
|
||||||
|
raise RuntimeError(f"unexpected pod list in namespace {namespace}")
|
||||||
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
def _psql(sql: str) -> str:
|
||||||
|
pod = _kubectl_first_pod("postgres")
|
||||||
|
return _run(
|
||||||
|
[
|
||||||
|
"kubectl",
|
||||||
|
"-n",
|
||||||
|
"postgres",
|
||||||
|
"exec",
|
||||||
|
"-i",
|
||||||
|
pod,
|
||||||
|
"--",
|
||||||
|
"psql",
|
||||||
|
"-U",
|
||||||
|
"postgres",
|
||||||
|
"-d",
|
||||||
|
"vaultwarden",
|
||||||
|
"-At",
|
||||||
|
"-F",
|
||||||
|
"\t",
|
||||||
|
"-c",
|
||||||
|
sql,
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_prefixes(prefixes: Iterable[str]) -> list[str]:
|
||||||
|
cleaned: list[str] = []
|
||||||
|
for prefix in prefixes:
|
||||||
|
prefix = prefix.strip()
|
||||||
|
if not prefix:
|
||||||
|
continue
|
||||||
|
if not _SAFE_PREFIX_RE.match(prefix):
|
||||||
|
raise SystemExit(
|
||||||
|
f"invalid prefix '{prefix}': must match {_SAFE_PREFIX_RE.pattern} (alnum plus ._-)"
|
||||||
|
)
|
||||||
|
if not prefix.endswith("-"):
|
||||||
|
raise SystemExit(f"refusing prefix '{prefix}': must end with '-' for safety")
|
||||||
|
cleaned.append(prefix)
|
||||||
|
if not cleaned:
|
||||||
|
raise SystemExit("at least one --prefix is required")
|
||||||
|
return sorted(set(cleaned))
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_rows(tsv: str) -> list[list[str]]:
|
||||||
|
rows: list[list[str]] = []
|
||||||
|
for line in tsv.splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
rows.append(line.split("\t"))
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def _sql_or_email_prefixes(prefixes: list[str]) -> str:
|
||||||
|
# prefixes validated to safe charset; safe to interpolate.
|
||||||
|
clauses = [f"email LIKE '{p}%'" for p in prefixes]
|
||||||
|
return " OR ".join(clauses) if clauses else "FALSE"
|
||||||
|
|
||||||
|
|
||||||
|
def _sql_quote(value: str) -> str:
|
||||||
|
return "'" + value.replace("'", "''") + "'"
|
||||||
|
|
||||||
|
|
||||||
|
def _sql_text_array(values: Iterable[str]) -> str:
|
||||||
|
items = ",".join(_sql_quote(v) for v in values)
|
||||||
|
return f"ARRAY[{items}]::text[]"
|
||||||
|
|
||||||
|
|
||||||
|
def _list_users(prefixes: list[str], protected: set[str]) -> list[VaultwardenUser]:
|
||||||
|
clause = _sql_or_email_prefixes(prefixes)
|
||||||
|
sql = f"""
|
||||||
|
WITH candidates AS (
|
||||||
|
SELECT uuid, email
|
||||||
|
FROM users
|
||||||
|
WHERE enabled
|
||||||
|
AND ({clause})
|
||||||
|
AND email <> ALL({_sql_text_array(sorted(protected))})
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
candidates.uuid,
|
||||||
|
candidates.email,
|
||||||
|
(
|
||||||
|
(SELECT COUNT(*) FROM auth_requests WHERE user_uuid = candidates.uuid) +
|
||||||
|
(SELECT COUNT(*) FROM ciphers WHERE user_uuid = candidates.uuid) +
|
||||||
|
(SELECT COUNT(*) FROM devices WHERE user_uuid = candidates.uuid) +
|
||||||
|
(SELECT COUNT(*) FROM emergency_access WHERE grantor_uuid = candidates.uuid OR grantee_uuid = candidates.uuid) +
|
||||||
|
(SELECT COUNT(*) FROM favorites WHERE user_uuid = candidates.uuid) +
|
||||||
|
(SELECT COUNT(*) FROM folders WHERE user_uuid = candidates.uuid) +
|
||||||
|
(SELECT COUNT(*) FROM sends WHERE user_uuid = candidates.uuid) +
|
||||||
|
(SELECT COUNT(*) FROM twofactor WHERE user_uuid = candidates.uuid) +
|
||||||
|
(SELECT COUNT(*) FROM twofactor_incomplete WHERE user_uuid = candidates.uuid) +
|
||||||
|
(SELECT COUNT(*) FROM users_collections WHERE user_uuid = candidates.uuid) +
|
||||||
|
(SELECT COUNT(*) FROM users_organizations WHERE user_uuid = candidates.uuid)
|
||||||
|
) AS dependent_rows
|
||||||
|
FROM candidates
|
||||||
|
ORDER BY candidates.email;
|
||||||
|
"""
|
||||||
|
out = _psql(sql)
|
||||||
|
users: list[VaultwardenUser] = []
|
||||||
|
for row in _parse_rows(out):
|
||||||
|
if len(row) < 3:
|
||||||
|
continue
|
||||||
|
uuid, email, dep_raw = row[0].strip(), row[1].strip(), row[2].strip()
|
||||||
|
if not uuid or not email:
|
||||||
|
continue
|
||||||
|
if not _UUID_RE.match(uuid):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
dep = int(dep_raw)
|
||||||
|
except ValueError:
|
||||||
|
dep = 0
|
||||||
|
users.append(VaultwardenUser(uuid=uuid, email=email, dependent_rows=dep))
|
||||||
|
return users
|
||||||
|
|
||||||
|
|
||||||
|
def _list_invitations(prefixes: list[str], protected: set[str]) -> list[str]:
|
||||||
|
clause = _sql_or_email_prefixes(prefixes)
|
||||||
|
protected_clause = ""
|
||||||
|
if protected:
|
||||||
|
protected_clause = f"AND email <> ALL({_sql_text_array(sorted(protected))})"
|
||||||
|
sql = f"SELECT email FROM invitations WHERE ({clause}) {protected_clause} ORDER BY email;"
|
||||||
|
out = _psql(sql)
|
||||||
|
invites: list[str] = []
|
||||||
|
for row in _parse_rows(out):
|
||||||
|
if not row:
|
||||||
|
continue
|
||||||
|
email = row[0].strip()
|
||||||
|
if email:
|
||||||
|
invites.append(email)
|
||||||
|
return invites
|
||||||
|
|
||||||
|
|
||||||
|
def _delete_invitations(emails: list[str]) -> int:
|
||||||
|
if not emails:
|
||||||
|
return 0
|
||||||
|
email_list = ",".join(_sql_quote(e) for e in emails)
|
||||||
|
sql = f"DELETE FROM invitations WHERE email IN ({email_list});"
|
||||||
|
out = _psql(sql)
|
||||||
|
match = re.search(r"DELETE\s+(\d+)", out)
|
||||||
|
return int(match.group(1)) if match else 0
|
||||||
|
|
||||||
|
|
||||||
|
def _delete_users(uuids: list[str]) -> int:
|
||||||
|
if not uuids:
|
||||||
|
return 0
|
||||||
|
uuid_list = ",".join(_sql_quote(u) for u in uuids)
|
||||||
|
sql = f"DELETE FROM users WHERE uuid IN ({uuid_list});"
|
||||||
|
out = _psql(sql)
|
||||||
|
match = re.search(r"DELETE\s+(\d+)", out)
|
||||||
|
return int(match.group(1)) if match else 0
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_args(argv: list[str]) -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog="test_vaultwarden_user_cleanup",
|
||||||
|
description="Manual-only cleanup for Vaultwarden test users/invites (DB-level).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--prefix",
|
||||||
|
action="append",
|
||||||
|
required=True,
|
||||||
|
help="Email prefix to target (repeatable). Example: --prefix e2e-",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--apply",
|
||||||
|
action="store_true",
|
||||||
|
help="Apply deletions (default is dry-run). Requires --confirm.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--confirm",
|
||||||
|
default="",
|
||||||
|
help="Required when using --apply. Must exactly equal the comma-separated prefix list.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--protect-email",
|
||||||
|
action="append",
|
||||||
|
default=[],
|
||||||
|
help="Vaultwarden emails that must never be deleted (repeatable).",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--verbose",
|
||||||
|
action="store_true",
|
||||||
|
help="List matched emails (and invitation emails).",
|
||||||
|
)
|
||||||
|
return parser.parse_args(argv)
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv: list[str]) -> int:
|
||||||
|
args = _parse_args(argv)
|
||||||
|
prefixes = _validate_prefixes(args.prefix)
|
||||||
|
expected_confirm = ",".join(prefixes)
|
||||||
|
|
||||||
|
protected = {e.strip() for e in args.protect_email if e.strip()}
|
||||||
|
protected |= {
|
||||||
|
"brad@bstein.dev",
|
||||||
|
"edstein87@outlook.com",
|
||||||
|
"indifox8@gmail.com",
|
||||||
|
"mgs.stein@gmail.com",
|
||||||
|
"patriot87@gmail.com",
|
||||||
|
}
|
||||||
|
|
||||||
|
if args.apply and args.confirm != expected_confirm:
|
||||||
|
print(
|
||||||
|
f"error: refusing to apply without --confirm '{expected_confirm}' (got '{args.confirm}')",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
users = _list_users(prefixes, protected=protected)
|
||||||
|
invites = _list_invitations(prefixes, protected=protected)
|
||||||
|
|
||||||
|
print(f"prefixes: {expected_confirm}")
|
||||||
|
print(f"mode: {'APPLY' if args.apply else 'DRY-RUN'}")
|
||||||
|
if protected:
|
||||||
|
print(f"protected emails: {', '.join(sorted(protected))}")
|
||||||
|
print(f"vaultwarden users matched: {len(users)}")
|
||||||
|
print(f"vaultwarden invitations matched: {len(invites)}")
|
||||||
|
|
||||||
|
if args.verbose:
|
||||||
|
for user in users[: min(100, len(users))]:
|
||||||
|
print(f" user: {user.email} (deps={user.dependent_rows})")
|
||||||
|
if len(users) > 100:
|
||||||
|
print(f" ... and {len(users) - 100} more users")
|
||||||
|
for email in invites[: min(100, len(invites))]:
|
||||||
|
print(f" invite: {email}")
|
||||||
|
if len(invites) > 100:
|
||||||
|
print(f" ... and {len(invites) - 100} more invitations")
|
||||||
|
|
||||||
|
unsafe = [u for u in users if u.dependent_rows > 0]
|
||||||
|
if unsafe:
|
||||||
|
print("refusing to delete users with dependent data:", file=sys.stderr)
|
||||||
|
for user in unsafe[: min(50, len(unsafe))]:
|
||||||
|
print(f" - {user.email} deps={user.dependent_rows}", file=sys.stderr)
|
||||||
|
if len(unsafe) > 50:
|
||||||
|
print(f" ... and {len(unsafe) - 50} more", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
if not args.apply:
|
||||||
|
print("dry-run complete (no changes made)")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
deleted_invites = _delete_invitations(invites)
|
||||||
|
deleted_users = _delete_users([u.uuid for u in users])
|
||||||
|
print(f"deleted vaultwarden invitations: {deleted_invites}")
|
||||||
|
print(f"deleted vaultwarden users: {deleted_users}")
|
||||||
|
print("done")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main(sys.argv[1:]))
|
||||||
15
scripts/test_vaultwarden_user_cleanup.sh
Executable file
15
scripts/test_vaultwarden_user_cleanup.sh
Executable file
@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Manual-only helper to clean Vaultwarden test users and invites from Postgres.
|
||||||
|
#
|
||||||
|
# Usage (dry-run):
|
||||||
|
# scripts/test_vaultwarden_user_cleanup.sh --prefix e2e-
|
||||||
|
#
|
||||||
|
# Usage (apply):
|
||||||
|
# scripts/test_vaultwarden_user_cleanup.sh --prefix e2e- --apply --confirm e2e-
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
|
||||||
|
|
||||||
|
python3 "${SCRIPT_DIR}/test_vaultwarden_user_cleanup.py" "$@"
|
||||||
|
|
||||||
@ -20,7 +20,13 @@ def load_sync_module(monkeypatch):
|
|||||||
}
|
}
|
||||||
for k, v in env.items():
|
for k, v in env.items():
|
||||||
monkeypatch.setenv(k, v)
|
monkeypatch.setenv(k, v)
|
||||||
module_path = pathlib.Path(__file__).resolve().parents[1] / "mailu_sync.py"
|
module_path = (
|
||||||
|
pathlib.Path(__file__).resolve().parents[2]
|
||||||
|
/ "services"
|
||||||
|
/ "mailu"
|
||||||
|
/ "scripts"
|
||||||
|
/ "mailu_sync.py"
|
||||||
|
)
|
||||||
spec = importlib.util.spec_from_file_location("mailu_sync_testmod", module_path)
|
spec = importlib.util.spec_from_file_location("mailu_sync_testmod", module_path)
|
||||||
module = importlib.util.module_from_spec(spec)
|
module = importlib.util.module_from_spec(spec)
|
||||||
assert spec.loader is not None
|
assert spec.loader is not None
|
||||||
@ -102,7 +108,8 @@ def test_kc_get_users_paginates(monkeypatch):
|
|||||||
sync.SESSION = _PagedSession()
|
sync.SESSION = _PagedSession()
|
||||||
users = sync.kc_get_users("tok")
|
users = sync.kc_get_users("tok")
|
||||||
assert [u["id"] for u in users] == ["u1", "u2"]
|
assert [u["id"] for u in users] == ["u1", "u2"]
|
||||||
assert sync.SESSION.calls == 2
|
# Pagination stops when results < page size.
|
||||||
|
assert sync.SESSION.calls == 1
|
||||||
|
|
||||||
|
|
||||||
def test_ensure_mailu_user_skips_foreign_domain(monkeypatch):
|
def test_ensure_mailu_user_skips_foreign_domain(monkeypatch):
|
||||||
@ -119,6 +126,7 @@ def test_ensure_mailu_user_skips_foreign_domain(monkeypatch):
|
|||||||
|
|
||||||
def test_ensure_mailu_user_upserts(monkeypatch):
|
def test_ensure_mailu_user_upserts(monkeypatch):
|
||||||
sync = load_sync_module(monkeypatch)
|
sync = load_sync_module(monkeypatch)
|
||||||
|
monkeypatch.setattr(sync.bcrypt_sha256, "hash", lambda password: f"hash:{password}")
|
||||||
captured = {}
|
captured = {}
|
||||||
|
|
||||||
class _Cursor:
|
class _Cursor:
|
||||||
@ -134,6 +142,7 @@ def test_ensure_mailu_user_upserts(monkeypatch):
|
|||||||
|
|
||||||
def test_main_generates_password_and_upserts(monkeypatch):
|
def test_main_generates_password_and_upserts(monkeypatch):
|
||||||
sync = load_sync_module(monkeypatch)
|
sync = load_sync_module(monkeypatch)
|
||||||
|
monkeypatch.setattr(sync.bcrypt_sha256, "hash", lambda password: f"hash:{password}")
|
||||||
users = [
|
users = [
|
||||||
{"id": "u1", "username": "user1", "email": "user1@example.com", "attributes": {}},
|
{"id": "u1", "username": "user1", "email": "user1@example.com", "attributes": {}},
|
||||||
{"id": "u2", "username": "user2", "email": "user2@example.com", "attributes": {"mailu_app_password": ["keepme"]}},
|
{"id": "u2", "username": "user2", "email": "user2@example.com", "attributes": {"mailu_app_password": ["keepme"]}},
|
||||||
@ -176,6 +185,6 @@ def test_main_generates_password_and_upserts(monkeypatch):
|
|||||||
|
|
||||||
sync.main()
|
sync.main()
|
||||||
|
|
||||||
# Should attempt two inserts (third user skipped due to domain mismatch)
|
# Always backfill mailu_email, even if Keycloak recovery email is external.
|
||||||
assert len(updated) == 1 # only one missing attr was backfilled
|
assert len(updated) == 3
|
||||||
assert conns and len(conns[0]._cursor.executions) == 2
|
assert conns and len(conns[0]._cursor.executions) == 3
|
||||||
|
|||||||
105
services/ai-llm/deployment.yaml
Normal file
105
services/ai-llm/deployment.yaml
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
# services/ai-llm/deployment.yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: ollama
|
||||||
|
namespace: ai
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 2
|
||||||
|
strategy:
|
||||||
|
type: RollingUpdate
|
||||||
|
rollingUpdate:
|
||||||
|
maxSurge: 0
|
||||||
|
maxUnavailable: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: ollama
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: ollama
|
||||||
|
annotations:
|
||||||
|
ai.bstein.dev/model: qwen2.5-coder:7b-instruct-q4_0
|
||||||
|
ai.bstein.dev/gpu: GPU pool (titan-20/21/22/24)
|
||||||
|
spec:
|
||||||
|
affinity:
|
||||||
|
nodeAffinity:
|
||||||
|
requiredDuringSchedulingIgnoredDuringExecution:
|
||||||
|
nodeSelectorTerms:
|
||||||
|
- matchExpressions:
|
||||||
|
- key: kubernetes.io/hostname
|
||||||
|
operator: In
|
||||||
|
values:
|
||||||
|
- titan-20
|
||||||
|
- titan-21
|
||||||
|
- titan-22
|
||||||
|
- titan-24
|
||||||
|
runtimeClassName: nvidia
|
||||||
|
volumes:
|
||||||
|
- name: models
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: ollama-models
|
||||||
|
initContainers:
|
||||||
|
- name: warm-model
|
||||||
|
image: ollama/ollama:latest
|
||||||
|
env:
|
||||||
|
- name: OLLAMA_HOST
|
||||||
|
value: 0.0.0.0
|
||||||
|
- name: NVIDIA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: NVIDIA_DRIVER_CAPABILITIES
|
||||||
|
value: compute,utility
|
||||||
|
- name: OLLAMA_MODELS
|
||||||
|
value: /root/.ollama
|
||||||
|
- name: OLLAMA_MODEL
|
||||||
|
value: qwen2.5-coder:7b-instruct-q4_0
|
||||||
|
command:
|
||||||
|
- /bin/sh
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
set -e
|
||||||
|
ollama serve >/tmp/ollama.log 2>&1 &
|
||||||
|
sleep 6
|
||||||
|
ollama pull "${OLLAMA_MODEL}"
|
||||||
|
pkill ollama || true
|
||||||
|
volumeMounts:
|
||||||
|
- name: models
|
||||||
|
mountPath: /root/.ollama
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 250m
|
||||||
|
memory: 1Gi
|
||||||
|
nvidia.com/gpu.shared: 1
|
||||||
|
limits:
|
||||||
|
nvidia.com/gpu.shared: 1
|
||||||
|
containers:
|
||||||
|
- name: ollama
|
||||||
|
image: ollama/ollama:latest
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 11434
|
||||||
|
env:
|
||||||
|
- name: OLLAMA_HOST
|
||||||
|
value: 0.0.0.0
|
||||||
|
- name: OLLAMA_KEEP_ALIVE
|
||||||
|
value: 6h
|
||||||
|
- name: OLLAMA_MODELS
|
||||||
|
value: /root/.ollama
|
||||||
|
- name: NVIDIA_VISIBLE_DEVICES
|
||||||
|
value: all
|
||||||
|
- name: NVIDIA_DRIVER_CAPABILITIES
|
||||||
|
value: compute,utility
|
||||||
|
volumeMounts:
|
||||||
|
- name: models
|
||||||
|
mountPath: /root/.ollama
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: "2"
|
||||||
|
memory: 8Gi
|
||||||
|
nvidia.com/gpu.shared: 1
|
||||||
|
limits:
|
||||||
|
cpu: "4"
|
||||||
|
memory: 12Gi
|
||||||
|
nvidia.com/gpu.shared: 1
|
||||||
9
services/ai-llm/kustomization.yaml
Normal file
9
services/ai-llm/kustomization.yaml
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# services/ai-llm/kustomization.yaml
|
||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
namespace: ai
|
||||||
|
resources:
|
||||||
|
- namespace.yaml
|
||||||
|
- pvc.yaml
|
||||||
|
- deployment.yaml
|
||||||
|
- service.yaml
|
||||||
5
services/ai-llm/namespace.yaml
Normal file
5
services/ai-llm/namespace.yaml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# services/ai-llm/namespace.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: ai
|
||||||
13
services/ai-llm/pvc.yaml
Normal file
13
services/ai-llm/pvc.yaml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
# services/ai-llm/pvc.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: PersistentVolumeClaim
|
||||||
|
metadata:
|
||||||
|
name: ollama-models
|
||||||
|
namespace: ai
|
||||||
|
spec:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 30Gi
|
||||||
|
storageClassName: astreae
|
||||||
14
services/ai-llm/service.yaml
Normal file
14
services/ai-llm/service.yaml
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
# services/ai-llm/service.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: ollama
|
||||||
|
namespace: ai
|
||||||
|
spec:
|
||||||
|
type: ClusterIP
|
||||||
|
selector:
|
||||||
|
app: ollama
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 11434
|
||||||
|
targetPort: 11434
|
||||||
@ -5,7 +5,7 @@ metadata:
|
|||||||
name: bstein-dev-home-backend
|
name: bstein-dev-home-backend
|
||||||
namespace: bstein-dev-home
|
namespace: bstein-dev-home
|
||||||
spec:
|
spec:
|
||||||
replicas: 2
|
replicas: 1
|
||||||
revisionHistoryLimit: 3
|
revisionHistoryLimit: 3
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
@ -15,6 +15,8 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
app: bstein-dev-home-backend
|
app: bstein-dev-home-backend
|
||||||
spec:
|
spec:
|
||||||
|
automountServiceAccountToken: true
|
||||||
|
serviceAccountName: bstein-dev-home
|
||||||
nodeSelector:
|
nodeSelector:
|
||||||
kubernetes.io/arch: arm64
|
kubernetes.io/arch: arm64
|
||||||
node-role.kubernetes.io/worker: "true"
|
node-role.kubernetes.io/worker: "true"
|
||||||
@ -22,8 +24,73 @@ spec:
|
|||||||
- name: harbor-bstein-robot
|
- name: harbor-bstein-robot
|
||||||
containers:
|
containers:
|
||||||
- name: backend
|
- name: backend
|
||||||
image: registry.bstein.dev/bstein/bstein-dev-home-backend:latest
|
image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"}
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
|
command: ["gunicorn"]
|
||||||
|
args:
|
||||||
|
- -b
|
||||||
|
- 0.0.0.0:8080
|
||||||
|
- --workers
|
||||||
|
- "2"
|
||||||
|
- --timeout
|
||||||
|
- "180"
|
||||||
|
- app:app
|
||||||
|
env:
|
||||||
|
- name: AI_CHAT_API
|
||||||
|
value: http://ollama.ai.svc.cluster.local:11434
|
||||||
|
- name: AI_CHAT_MODEL
|
||||||
|
value: qwen2.5-coder:7b-instruct-q4_0
|
||||||
|
- name: AI_CHAT_TIMEOUT_SEC
|
||||||
|
value: "60"
|
||||||
|
- name: AI_NODE_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: spec.nodeName
|
||||||
|
- name: AI_NODE_GPU_MAP
|
||||||
|
value: |
|
||||||
|
{"titan-20": "Jetson Xavier (edge GPU)", "titan-21": "Jetson Xavier (edge GPU)", "titan-22": "RTX 3050 8GB (local GPU)", "titan-24": "RTX 3080 8GB (local GPU)"}
|
||||||
|
- name: KEYCLOAK_ENABLED
|
||||||
|
value: "true"
|
||||||
|
- name: KEYCLOAK_URL
|
||||||
|
value: https://sso.bstein.dev
|
||||||
|
- name: KEYCLOAK_REALM
|
||||||
|
value: atlas
|
||||||
|
- name: KEYCLOAK_CLIENT_ID
|
||||||
|
value: bstein-dev-home
|
||||||
|
- name: KEYCLOAK_ISSUER
|
||||||
|
value: https://sso.bstein.dev/realms/atlas
|
||||||
|
- name: KEYCLOAK_JWKS_URL
|
||||||
|
value: http://keycloak.sso.svc.cluster.local/realms/atlas/protocol/openid-connect/certs
|
||||||
|
- name: KEYCLOAK_ADMIN_URL
|
||||||
|
value: http://keycloak.sso.svc.cluster.local
|
||||||
|
- name: KEYCLOAK_ADMIN_REALM
|
||||||
|
value: atlas
|
||||||
|
- name: KEYCLOAK_ADMIN_CLIENT_ID
|
||||||
|
value: bstein-dev-home-admin
|
||||||
|
- name: KEYCLOAK_ADMIN_CLIENT_SECRET
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: bstein-dev-home-keycloak-admin
|
||||||
|
key: client_secret
|
||||||
|
- name: ACCOUNT_ALLOWED_GROUPS
|
||||||
|
value: ""
|
||||||
|
- name: PORTAL_DATABASE_URL
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: atlas-portal-db
|
||||||
|
key: PORTAL_DATABASE_URL
|
||||||
|
- name: HTTP_CHECK_TIMEOUT_SEC
|
||||||
|
value: "2"
|
||||||
|
- name: ACCESS_REQUEST_SUBMIT_RATE_LIMIT
|
||||||
|
value: "30"
|
||||||
|
- name: ACCESS_REQUEST_SUBMIT_RATE_WINDOW_SEC
|
||||||
|
value: "3600"
|
||||||
|
- name: ACCESS_REQUEST_STATUS_RATE_LIMIT
|
||||||
|
value: "120"
|
||||||
|
- name: ACCESS_REQUEST_STATUS_RATE_WINDOW_SEC
|
||||||
|
value: "60"
|
||||||
|
- name: ACCESS_REQUEST_INTERNAL_EMAIL_ALLOWLIST
|
||||||
|
value: robotuser@bstein.dev
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
containerPort: 8080
|
containerPort: 8080
|
||||||
@ -33,16 +100,18 @@ spec:
|
|||||||
port: http
|
port: http
|
||||||
initialDelaySeconds: 2
|
initialDelaySeconds: 2
|
||||||
periodSeconds: 5
|
periodSeconds: 5
|
||||||
|
timeoutSeconds: 3
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /api/healthz
|
path: /api/healthz
|
||||||
port: http
|
port: http
|
||||||
initialDelaySeconds: 10
|
initialDelaySeconds: 10
|
||||||
periodSeconds: 10
|
periodSeconds: 10
|
||||||
|
timeoutSeconds: 3
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 50m
|
cpu: 100m
|
||||||
memory: 64Mi
|
memory: 128Mi
|
||||||
limits:
|
limits:
|
||||||
cpu: 300m
|
cpu: 500m
|
||||||
memory: 256Mi
|
memory: 512Mi
|
||||||
|
|||||||
69
services/bstein-dev-home/chat-ai-gateway-deployment.yaml
Normal file
69
services/bstein-dev-home/chat-ai-gateway-deployment.yaml
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
# services/bstein-dev-home/chat-ai-gateway-deployment.yaml
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: chat-ai-gateway
|
||||||
|
namespace: bstein-dev-home
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
revisionHistoryLimit: 2
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: chat-ai-gateway
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app: chat-ai-gateway
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/arch: arm64
|
||||||
|
node-role.kubernetes.io/worker: "true"
|
||||||
|
containers:
|
||||||
|
- name: gateway
|
||||||
|
image: python:3.11-slim
|
||||||
|
command: ["/bin/sh","-c"]
|
||||||
|
args:
|
||||||
|
- python /app/gateway.py
|
||||||
|
env:
|
||||||
|
- name: UPSTREAM_URL
|
||||||
|
value: http://bstein-dev-home-backend/api/chat
|
||||||
|
- name: CHAT_KEY_MATRIX
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: chat-ai-keys-runtime
|
||||||
|
key: matrix
|
||||||
|
- name: CHAT_KEY_HOMEPAGE
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: chat-ai-keys-runtime
|
||||||
|
key: homepage
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
containerPort: 8080
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 2
|
||||||
|
periodSeconds: 5
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /healthz
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 20m
|
||||||
|
memory: 64Mi
|
||||||
|
limits:
|
||||||
|
cpu: 200m
|
||||||
|
memory: 256Mi
|
||||||
|
volumeMounts:
|
||||||
|
- name: code
|
||||||
|
mountPath: /app/gateway.py
|
||||||
|
subPath: gateway.py
|
||||||
|
volumes:
|
||||||
|
- name: code
|
||||||
|
configMap:
|
||||||
|
name: chat-ai-gateway
|
||||||
13
services/bstein-dev-home/chat-ai-gateway-service.yaml
Normal file
13
services/bstein-dev-home/chat-ai-gateway-service.yaml
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
# services/bstein-dev-home/chat-ai-gateway-service.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: chat-ai-gateway
|
||||||
|
namespace: bstein-dev-home
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app: chat-ai-gateway
|
||||||
|
ports:
|
||||||
|
- name: http
|
||||||
|
port: 80
|
||||||
|
targetPort: 8080
|
||||||
@ -5,7 +5,7 @@ metadata:
|
|||||||
name: bstein-dev-home-frontend
|
name: bstein-dev-home-frontend
|
||||||
namespace: bstein-dev-home
|
namespace: bstein-dev-home
|
||||||
spec:
|
spec:
|
||||||
replicas: 2
|
replicas: 1
|
||||||
revisionHistoryLimit: 3
|
revisionHistoryLimit: 3
|
||||||
selector:
|
selector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
@ -22,7 +22,7 @@ spec:
|
|||||||
- name: harbor-bstein-robot
|
- name: harbor-bstein-robot
|
||||||
containers:
|
containers:
|
||||||
- name: frontend
|
- name: frontend
|
||||||
image: registry.bstein.dev/bstein/bstein-dev-home-frontend:latest
|
image: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"}
|
||||||
imagePullPolicy: Always
|
imagePullPolicy: Always
|
||||||
ports:
|
ports:
|
||||||
- name: http
|
- name: http
|
||||||
|
|||||||
@ -11,7 +11,7 @@ metadata:
|
|||||||
cert-manager.io/cluster-issuer: letsencrypt
|
cert-manager.io/cluster-issuer: letsencrypt
|
||||||
spec:
|
spec:
|
||||||
tls:
|
tls:
|
||||||
- hosts: [ "bstein.dev" ]
|
- hosts: [ "bstein.dev", "chat.ai.bstein.dev" ]
|
||||||
secretName: bstein-dev-home-tls
|
secretName: bstein-dev-home-tls
|
||||||
rules:
|
rules:
|
||||||
- host: bstein.dev
|
- host: bstein.dev
|
||||||
@ -29,3 +29,12 @@ spec:
|
|||||||
service:
|
service:
|
||||||
name: bstein-dev-home-frontend
|
name: bstein-dev-home-frontend
|
||||||
port: { number: 80 }
|
port: { number: 80 }
|
||||||
|
- host: chat.ai.bstein.dev
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: chat-ai-gateway
|
||||||
|
port: { number: 80 }
|
||||||
|
|||||||
@ -5,13 +5,38 @@ namespace: bstein-dev-home
|
|||||||
resources:
|
resources:
|
||||||
- namespace.yaml
|
- namespace.yaml
|
||||||
- image.yaml
|
- image.yaml
|
||||||
|
- rbac.yaml
|
||||||
|
- portal-e2e-client-secret-sync-rbac.yaml
|
||||||
|
- chat-ai-gateway-deployment.yaml
|
||||||
|
- chat-ai-gateway-service.yaml
|
||||||
- frontend-deployment.yaml
|
- frontend-deployment.yaml
|
||||||
- frontend-service.yaml
|
- frontend-service.yaml
|
||||||
- backend-deployment.yaml
|
- backend-deployment.yaml
|
||||||
- backend-service.yaml
|
- backend-service.yaml
|
||||||
|
- vaultwarden-cred-sync-cronjob.yaml
|
||||||
|
- portal-onboarding-e2e-test-job.yaml
|
||||||
- ingress.yaml
|
- ingress.yaml
|
||||||
images:
|
images:
|
||||||
- name: registry.bstein.dev/bstein/bstein-dev-home-frontend
|
- name: registry.bstein.dev/bstein/bstein-dev-home-frontend
|
||||||
newTag: 0.1.1-0 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"}
|
newTag: registry.bstein.dev/bstein/bstein-dev-home-frontend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-frontend"}
|
||||||
- name: registry.bstein.dev/bstein/bstein-dev-home-backend
|
- name: registry.bstein.dev/bstein/bstein-dev-home-backend
|
||||||
newTag: 0.1.1-0 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"}
|
newTag: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"}
|
||||||
|
configMapGenerator:
|
||||||
|
- name: chat-ai-gateway
|
||||||
|
namespace: bstein-dev-home
|
||||||
|
files:
|
||||||
|
- gateway.py=scripts/gateway.py
|
||||||
|
options:
|
||||||
|
disableNameSuffixHash: true
|
||||||
|
- name: vaultwarden-cred-sync-script
|
||||||
|
namespace: bstein-dev-home
|
||||||
|
files:
|
||||||
|
- vaultwarden_cred_sync.py=scripts/vaultwarden_cred_sync.py
|
||||||
|
options:
|
||||||
|
disableNameSuffixHash: true
|
||||||
|
- name: portal-onboarding-e2e-tests
|
||||||
|
namespace: bstein-dev-home
|
||||||
|
files:
|
||||||
|
- test_portal_onboarding_flow.py=scripts/test_portal_onboarding_flow.py
|
||||||
|
options:
|
||||||
|
disableNameSuffixHash: true
|
||||||
|
|||||||
@ -0,0 +1,24 @@
|
|||||||
|
# services/bstein-dev-home/portal-e2e-client-secret-sync-rbac.yaml
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: portal-e2e-client-secret-sync-target
|
||||||
|
namespace: bstein-dev-home
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["secrets"]
|
||||||
|
verbs: ["get", "create", "patch", "update"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: portal-e2e-client-secret-sync-target
|
||||||
|
namespace: bstein-dev-home
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: portal-e2e-client-secret-sync
|
||||||
|
namespace: sso
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: Role
|
||||||
|
name: portal-e2e-client-secret-sync-target
|
||||||
66
services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml
Normal file
66
services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
# services/bstein-dev-home/portal-onboarding-e2e-test-job.yaml
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: Job
|
||||||
|
metadata:
|
||||||
|
name: portal-onboarding-e2e-test-11
|
||||||
|
namespace: bstein-dev-home
|
||||||
|
spec:
|
||||||
|
backoffLimit: 0
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
restartPolicy: Never
|
||||||
|
containers:
|
||||||
|
- name: test
|
||||||
|
image: python:3.11-slim
|
||||||
|
env:
|
||||||
|
- name: PORTAL_BASE_URL
|
||||||
|
value: http://bstein-dev-home-backend.bstein-dev-home.svc.cluster.local
|
||||||
|
- name: KEYCLOAK_ADMIN_URL
|
||||||
|
value: https://sso.bstein.dev
|
||||||
|
- name: KEYCLOAK_REALM
|
||||||
|
value: atlas
|
||||||
|
- name: KEYCLOAK_ADMIN_CLIENT_ID
|
||||||
|
value: bstein-dev-home-admin
|
||||||
|
- name: KEYCLOAK_ADMIN_CLIENT_SECRET
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: bstein-dev-home-keycloak-admin
|
||||||
|
key: client_secret
|
||||||
|
- name: PORTAL_E2E_CLIENT_ID
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: portal-e2e-client
|
||||||
|
key: client_id
|
||||||
|
- name: PORTAL_E2E_CLIENT_SECRET
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: portal-e2e-client
|
||||||
|
key: client_secret
|
||||||
|
- name: PORTAL_TARGET_CLIENT_ID
|
||||||
|
value: bstein-dev-home
|
||||||
|
- name: E2E_PORTAL_ADMIN_USERNAME
|
||||||
|
value: bstein
|
||||||
|
- name: E2E_USERNAME_PREFIX
|
||||||
|
value: e2e-portal
|
||||||
|
- name: E2E_CONTACT_EMAIL
|
||||||
|
value: robotuser@bstein.dev
|
||||||
|
- name: E2E_IMAP_KEYCLOAK_USERNAME
|
||||||
|
value: robotuser
|
||||||
|
- name: E2E_DEADLINE_SECONDS
|
||||||
|
value: "600"
|
||||||
|
- name: E2E_POLL_SECONDS
|
||||||
|
value: "10"
|
||||||
|
command: ["/bin/sh", "-c"]
|
||||||
|
args:
|
||||||
|
- |
|
||||||
|
set -euo pipefail
|
||||||
|
python /scripts/test_portal_onboarding_flow.py
|
||||||
|
volumeMounts:
|
||||||
|
- name: tests
|
||||||
|
mountPath: /scripts
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: tests
|
||||||
|
configMap:
|
||||||
|
name: portal-onboarding-e2e-tests
|
||||||
|
defaultMode: 0555
|
||||||
108
services/bstein-dev-home/rbac.yaml
Normal file
108
services/bstein-dev-home/rbac.yaml
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
# services/bstein-dev-home/rbac.yaml
|
||||||
|
apiVersion: v1
|
||||||
|
kind: ServiceAccount
|
||||||
|
metadata:
|
||||||
|
name: bstein-dev-home
|
||||||
|
namespace: bstein-dev-home
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: bstein-dev-home-ai-reader
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["pods"]
|
||||||
|
verbs: ["get", "list", "watch"]
|
||||||
|
resourceNames: []
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: bstein-dev-home-ai-reader
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: bstein-dev-home-ai-reader
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: bstein-dev-home
|
||||||
|
namespace: bstein-dev-home
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRole
|
||||||
|
metadata:
|
||||||
|
name: bstein-dev-home-vaultwarden-admin-secret-reader
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["secrets"]
|
||||||
|
verbs: ["get"]
|
||||||
|
resourceNames: ["vaultwarden-admin"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: ClusterRoleBinding
|
||||||
|
metadata:
|
||||||
|
name: bstein-dev-home-vaultwarden-admin-secret-reader
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: ClusterRole
|
||||||
|
name: bstein-dev-home-vaultwarden-admin-secret-reader
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: bstein-dev-home
|
||||||
|
namespace: bstein-dev-home
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: bstein-dev-home-vaultwarden-admin-token-reader
|
||||||
|
namespace: vaultwarden
|
||||||
|
rules:
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["secrets"]
|
||||||
|
verbs: ["get"]
|
||||||
|
resourceNames: ["vaultwarden-admin"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: bstein-dev-home-vaultwarden-admin-token-reader
|
||||||
|
namespace: vaultwarden
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: Role
|
||||||
|
name: bstein-dev-home-vaultwarden-admin-token-reader
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: bstein-dev-home
|
||||||
|
namespace: bstein-dev-home
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: Role
|
||||||
|
metadata:
|
||||||
|
name: bstein-dev-home-nextcloud-mail-sync
|
||||||
|
namespace: nextcloud
|
||||||
|
rules:
|
||||||
|
- apiGroups: ["batch"]
|
||||||
|
resources: ["cronjobs"]
|
||||||
|
verbs: ["get"]
|
||||||
|
resourceNames: ["nextcloud-mail-sync"]
|
||||||
|
- apiGroups: ["batch"]
|
||||||
|
resources: ["jobs"]
|
||||||
|
verbs: ["create", "get", "list", "watch"]
|
||||||
|
- apiGroups: [""]
|
||||||
|
resources: ["pods"]
|
||||||
|
verbs: ["get", "list"]
|
||||||
|
---
|
||||||
|
apiVersion: rbac.authorization.k8s.io/v1
|
||||||
|
kind: RoleBinding
|
||||||
|
metadata:
|
||||||
|
name: bstein-dev-home-nextcloud-mail-sync
|
||||||
|
namespace: nextcloud
|
||||||
|
roleRef:
|
||||||
|
apiGroup: rbac.authorization.k8s.io
|
||||||
|
kind: Role
|
||||||
|
name: bstein-dev-home-nextcloud-mail-sync
|
||||||
|
subjects:
|
||||||
|
- kind: ServiceAccount
|
||||||
|
name: bstein-dev-home
|
||||||
|
namespace: bstein-dev-home
|
||||||
70
services/bstein-dev-home/scripts/gateway.py
Normal file
70
services/bstein-dev-home/scripts/gateway.py
Normal file
@ -0,0 +1,70 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||||
|
from urllib import request, error
|
||||||
|
|
||||||
|
UPSTREAM = os.environ.get("UPSTREAM_URL", "http://bstein-dev-home-backend/api/chat")
|
||||||
|
KEY_MATRIX = os.environ.get("CHAT_KEY_MATRIX", "")
|
||||||
|
KEY_HOMEPAGE = os.environ.get("CHAT_KEY_HOMEPAGE", "")
|
||||||
|
|
||||||
|
ALLOWED = {k for k in (KEY_MATRIX, KEY_HOMEPAGE) if k}
|
||||||
|
|
||||||
|
class Handler(BaseHTTPRequestHandler):
|
||||||
|
def _send_json(self, code: int, payload: dict):
|
||||||
|
body = json.dumps(payload).encode()
|
||||||
|
self.send_response(code)
|
||||||
|
self.send_header("Content-Type", "application/json")
|
||||||
|
self.send_header("Content-Length", str(len(body)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(body)
|
||||||
|
|
||||||
|
def do_GET(self): # noqa: N802
|
||||||
|
if self.path in ("/healthz", "/"):
|
||||||
|
return self._send_json(200, {"ok": True})
|
||||||
|
return self._send_json(404, {"error": "not_found"})
|
||||||
|
|
||||||
|
def do_POST(self): # noqa: N802
|
||||||
|
if self.path != "/":
|
||||||
|
return self._send_json(404, {"error": "not_found"})
|
||||||
|
|
||||||
|
key = self.headers.get("x-api-key", "")
|
||||||
|
if not key or key not in ALLOWED:
|
||||||
|
return self._send_json(401, {"error": "unauthorized"})
|
||||||
|
|
||||||
|
length = int(self.headers.get("content-length", "0") or "0")
|
||||||
|
raw = self.rfile.read(length) if length else b"{}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
upstream_req = request.Request(
|
||||||
|
UPSTREAM,
|
||||||
|
data=raw,
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
with request.urlopen(upstream_req, timeout=90) as resp:
|
||||||
|
data = resp.read()
|
||||||
|
self.send_response(resp.status)
|
||||||
|
for k, v in resp.headers.items():
|
||||||
|
if k.lower() in ("content-length", "connection", "server", "date"):
|
||||||
|
continue
|
||||||
|
self.send_header(k, v)
|
||||||
|
self.send_header("Content-Length", str(len(data)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(data)
|
||||||
|
except error.HTTPError as e:
|
||||||
|
data = e.read() if hasattr(e, "read") else b""
|
||||||
|
self.send_response(e.code)
|
||||||
|
self.send_header("Content-Type", "application/json")
|
||||||
|
self.send_header("Content-Length", str(len(data)))
|
||||||
|
self.end_headers()
|
||||||
|
self.wfile.write(data)
|
||||||
|
except Exception:
|
||||||
|
return self._send_json(502, {"error": "bad_gateway"})
|
||||||
|
|
||||||
|
def main():
|
||||||
|
port = int(os.environ.get("PORT", "8080"))
|
||||||
|
httpd = HTTPServer(("0.0.0.0", port), Handler)
|
||||||
|
httpd.serve_forever()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
428
services/bstein-dev-home/scripts/test_portal_onboarding_flow.py
Normal file
428
services/bstein-dev-home/scripts/test_portal_onboarding_flow.py
Normal file
@ -0,0 +1,428 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import email
|
||||||
|
import http.client
|
||||||
|
import imaplib
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import ssl
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import urllib.error
|
||||||
|
import urllib.parse
|
||||||
|
import urllib.request
|
||||||
|
|
||||||
|
|
||||||
|
def _env(name: str, default: str | None = None) -> str:
|
||||||
|
value = os.environ.get(name, default)
|
||||||
|
if value is None or value == "":
|
||||||
|
raise SystemExit(f"missing required env var: {name}")
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def _post_json(url: str, payload: dict, timeout_s: int = 30) -> dict:
|
||||||
|
body = json.dumps(payload).encode()
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
data=body,
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout_s) as resp:
|
||||||
|
raw = resp.read().decode()
|
||||||
|
return json.loads(raw) if raw else {}
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
raw = exc.read().decode(errors="replace")
|
||||||
|
raise SystemExit(f"HTTP {exc.code} from {url}: {raw}")
|
||||||
|
|
||||||
|
|
||||||
|
def _post_form(url: str, data: dict[str, str], timeout_s: int = 30) -> dict:
|
||||||
|
body = urllib.parse.urlencode(data).encode()
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
data=body,
|
||||||
|
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout_s) as resp:
|
||||||
|
raw = resp.read().decode()
|
||||||
|
return json.loads(raw) if raw else {}
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
raw = exc.read().decode(errors="replace")
|
||||||
|
raise SystemExit(f"HTTP {exc.code} from {url}: {raw}")
|
||||||
|
|
||||||
|
|
||||||
|
def _get_json(url: str, headers: dict[str, str] | None = None, timeout_s: int = 30) -> object:
|
||||||
|
req = urllib.request.Request(url, headers=headers or {}, method="GET")
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout_s) as resp:
|
||||||
|
raw = resp.read().decode()
|
||||||
|
return json.loads(raw) if raw else None
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
raw = exc.read().decode(errors="replace")
|
||||||
|
raise SystemExit(f"HTTP {exc.code} from {url}: {raw}")
|
||||||
|
|
||||||
|
|
||||||
|
def _request_json(
|
||||||
|
method: str,
|
||||||
|
url: str,
|
||||||
|
token: str,
|
||||||
|
payload: dict | None = None,
|
||||||
|
timeout_s: int = 30,
|
||||||
|
) -> dict:
|
||||||
|
data = None
|
||||||
|
headers = {"Authorization": f"Bearer {token}"}
|
||||||
|
if payload is not None:
|
||||||
|
data = json.dumps(payload).encode()
|
||||||
|
headers["Content-Type"] = "application/json"
|
||||||
|
req = urllib.request.Request(url, data=data, headers=headers, method=method)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout_s) as resp:
|
||||||
|
raw = resp.read().decode()
|
||||||
|
return json.loads(raw) if raw else {}
|
||||||
|
except urllib.error.HTTPError as exc:
|
||||||
|
raw = exc.read().decode(errors="replace")
|
||||||
|
raise SystemExit(f"HTTP {exc.code} from {url}: {raw}")
|
||||||
|
|
||||||
|
|
||||||
|
def _keycloak_client_token(keycloak_base: str, realm: str, client_id: str, client_secret: str) -> str:
|
||||||
|
token_url = f"{keycloak_base.rstrip('/')}/realms/{realm}/protocol/openid-connect/token"
|
||||||
|
payload = _post_form(
|
||||||
|
token_url,
|
||||||
|
{
|
||||||
|
"grant_type": "client_credentials",
|
||||||
|
"client_id": client_id,
|
||||||
|
"client_secret": client_secret,
|
||||||
|
},
|
||||||
|
timeout_s=20,
|
||||||
|
)
|
||||||
|
token = payload.get("access_token")
|
||||||
|
if not isinstance(token, str) or not token:
|
||||||
|
raise SystemExit("keycloak token response missing access_token")
|
||||||
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
def _keycloak_token_exchange(
|
||||||
|
*,
|
||||||
|
keycloak_base: str,
|
||||||
|
realm: str,
|
||||||
|
client_id: str,
|
||||||
|
client_secret: str,
|
||||||
|
subject_token: str,
|
||||||
|
requested_subject: str,
|
||||||
|
audience: str,
|
||||||
|
) -> str:
|
||||||
|
token_url = f"{keycloak_base.rstrip('/')}/realms/{realm}/protocol/openid-connect/token"
|
||||||
|
payload = _post_form(
|
||||||
|
token_url,
|
||||||
|
{
|
||||||
|
"grant_type": "urn:ietf:params:oauth:grant-type:token-exchange",
|
||||||
|
"client_id": client_id,
|
||||||
|
"client_secret": client_secret,
|
||||||
|
"subject_token": subject_token,
|
||||||
|
"requested_subject": requested_subject,
|
||||||
|
"audience": audience,
|
||||||
|
},
|
||||||
|
timeout_s=20,
|
||||||
|
)
|
||||||
|
token = payload.get("access_token")
|
||||||
|
if not isinstance(token, str) or not token:
|
||||||
|
raise SystemExit("keycloak token exchange response missing access_token")
|
||||||
|
return token
|
||||||
|
|
||||||
|
|
||||||
|
def _keycloak_find_user(keycloak_base: str, realm: str, token: str, username: str) -> dict | None:
|
||||||
|
url = f"{keycloak_base.rstrip('/')}/admin/realms/{realm}/users?{urllib.parse.urlencode({'username': username, 'exact': 'true', 'max': '1'})}"
|
||||||
|
users = _get_json(url, headers={"Authorization": f"Bearer {token}"}, timeout_s=20)
|
||||||
|
if not isinstance(users, list) or not users:
|
||||||
|
return None
|
||||||
|
user = users[0]
|
||||||
|
return user if isinstance(user, dict) else None
|
||||||
|
|
||||||
|
|
||||||
|
def _keycloak_get_user(keycloak_base: str, realm: str, token: str, user_id: str) -> dict:
|
||||||
|
url = f"{keycloak_base.rstrip('/')}/admin/realms/{realm}/users/{urllib.parse.quote(user_id, safe='')}"
|
||||||
|
data = _get_json(url, headers={"Authorization": f"Bearer {token}"}, timeout_s=20)
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
raise SystemExit("unexpected keycloak user payload")
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_attr(attributes: object, key: str) -> str:
|
||||||
|
if not isinstance(attributes, dict):
|
||||||
|
return ""
|
||||||
|
value = attributes.get(key)
|
||||||
|
if isinstance(value, list) and value and isinstance(value[0], str):
|
||||||
|
return value[0]
|
||||||
|
if isinstance(value, str):
|
||||||
|
return value
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _imap_wait_for_verify_token(
|
||||||
|
*,
|
||||||
|
host: str,
|
||||||
|
port: int,
|
||||||
|
username: str,
|
||||||
|
password: str,
|
||||||
|
request_code: str,
|
||||||
|
deadline_sec: int,
|
||||||
|
) -> str:
|
||||||
|
ssl_context = ssl._create_unverified_context()
|
||||||
|
deadline_at = time.monotonic() + deadline_sec
|
||||||
|
|
||||||
|
with imaplib.IMAP4_SSL(host, port, ssl_context=ssl_context) as client:
|
||||||
|
client.login(username, password)
|
||||||
|
client.select("INBOX")
|
||||||
|
|
||||||
|
while time.monotonic() < deadline_at:
|
||||||
|
status, data = client.search(None, "TEXT", request_code)
|
||||||
|
if status == "OK" and data and data[0]:
|
||||||
|
ids = data[0].split()
|
||||||
|
msg_id = ids[-1]
|
||||||
|
fetch_status, msg_data = client.fetch(msg_id, "(RFC822)")
|
||||||
|
if fetch_status != "OK" or not msg_data:
|
||||||
|
time.sleep(2)
|
||||||
|
continue
|
||||||
|
|
||||||
|
raw = msg_data[0][1] if isinstance(msg_data[0], tuple) and len(msg_data[0]) > 1 else None
|
||||||
|
if not isinstance(raw, (bytes, bytearray)):
|
||||||
|
time.sleep(2)
|
||||||
|
continue
|
||||||
|
|
||||||
|
message = email.message_from_bytes(raw)
|
||||||
|
body = None
|
||||||
|
if message.is_multipart():
|
||||||
|
for part in message.walk():
|
||||||
|
if part.get_content_type() == "text/plain":
|
||||||
|
payload = part.get_payload(decode=True)
|
||||||
|
if isinstance(payload, (bytes, bytearray)):
|
||||||
|
body = payload.decode(errors="replace")
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
payload = message.get_payload(decode=True)
|
||||||
|
if isinstance(payload, (bytes, bytearray)):
|
||||||
|
body = payload.decode(errors="replace")
|
||||||
|
|
||||||
|
if not body:
|
||||||
|
time.sleep(2)
|
||||||
|
continue
|
||||||
|
|
||||||
|
url = None
|
||||||
|
for line in body.splitlines():
|
||||||
|
candidate = line.strip()
|
||||||
|
if "verify=" in candidate and candidate.startswith("http"):
|
||||||
|
url = candidate
|
||||||
|
break
|
||||||
|
if not url:
|
||||||
|
match = re.search(r"https?://\\S+verify=\\S+", body)
|
||||||
|
url = match.group(0) if match else None
|
||||||
|
if not url:
|
||||||
|
time.sleep(2)
|
||||||
|
continue
|
||||||
|
|
||||||
|
parsed = urllib.parse.urlparse(url)
|
||||||
|
query = urllib.parse.parse_qs(parsed.query)
|
||||||
|
token = query.get("verify", [""])[0]
|
||||||
|
if isinstance(token, str) and token:
|
||||||
|
return token
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
raise SystemExit("verification email not found before deadline")
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
portal_base = _env("PORTAL_BASE_URL").rstrip("/")
|
||||||
|
|
||||||
|
keycloak_base = _env("KEYCLOAK_ADMIN_URL").rstrip("/")
|
||||||
|
realm = _env("KEYCLOAK_REALM", "atlas")
|
||||||
|
kc_admin_client_id = _env("KEYCLOAK_ADMIN_CLIENT_ID")
|
||||||
|
kc_admin_client_secret = _env("KEYCLOAK_ADMIN_CLIENT_SECRET")
|
||||||
|
portal_e2e_client_id = _env("PORTAL_E2E_CLIENT_ID")
|
||||||
|
portal_e2e_client_secret = _env("PORTAL_E2E_CLIENT_SECRET")
|
||||||
|
portal_target_client_id = os.environ.get("PORTAL_TARGET_CLIENT_ID", "bstein-dev-home").strip() or "bstein-dev-home"
|
||||||
|
portal_admin_username = os.environ.get("E2E_PORTAL_ADMIN_USERNAME", "bstein").strip() or "bstein"
|
||||||
|
|
||||||
|
contact_email = os.environ.get("E2E_CONTACT_EMAIL", "robotuser@bstein.dev").strip()
|
||||||
|
if not contact_email:
|
||||||
|
raise SystemExit("E2E_CONTACT_EMAIL must not be empty")
|
||||||
|
|
||||||
|
imap_host = os.environ.get("E2E_IMAP_HOST", "mailu-front.mailu-mailserver.svc.cluster.local").strip()
|
||||||
|
imap_port = int(os.environ.get("E2E_IMAP_PORT", "993"))
|
||||||
|
imap_keycloak_username = os.environ.get("E2E_IMAP_KEYCLOAK_USERNAME", "robotuser").strip()
|
||||||
|
imap_wait_sec = int(os.environ.get("E2E_IMAP_WAIT_SECONDS", "90"))
|
||||||
|
|
||||||
|
try:
|
||||||
|
token = _keycloak_client_token(keycloak_base, realm, kc_admin_client_id, kc_admin_client_secret)
|
||||||
|
except SystemExit as exc:
|
||||||
|
raise SystemExit(f"failed to fetch keycloak token for admin client {kc_admin_client_id!r}: {exc}")
|
||||||
|
mailbox_user = _keycloak_find_user(keycloak_base, realm, token, imap_keycloak_username)
|
||||||
|
if not mailbox_user:
|
||||||
|
raise SystemExit(f"unable to locate Keycloak mailbox user {imap_keycloak_username!r}")
|
||||||
|
mailbox_user_id = mailbox_user.get("id")
|
||||||
|
if not isinstance(mailbox_user_id, str) or not mailbox_user_id:
|
||||||
|
raise SystemExit("mailbox user missing id")
|
||||||
|
|
||||||
|
mailbox_full = _keycloak_get_user(keycloak_base, realm, token, mailbox_user_id)
|
||||||
|
mailbox_attrs = mailbox_full.get("attributes")
|
||||||
|
mailu_email = _extract_attr(mailbox_attrs, "mailu_email")
|
||||||
|
if not mailu_email:
|
||||||
|
mailu_email = contact_email
|
||||||
|
mailu_password = _extract_attr(mailbox_attrs, "mailu_app_password")
|
||||||
|
if not mailu_password:
|
||||||
|
raise SystemExit(f"Keycloak user {imap_keycloak_username!r} missing mailu_app_password attribute")
|
||||||
|
|
||||||
|
username_prefix = os.environ.get("E2E_USERNAME_PREFIX", "e2e-user")
|
||||||
|
now = int(time.time())
|
||||||
|
username = f"{username_prefix}-{now}"
|
||||||
|
|
||||||
|
submit_url = f"{portal_base}/api/access/request"
|
||||||
|
submit_payload = {"username": username, "email": contact_email, "note": "portal onboarding e2e"}
|
||||||
|
submit = None
|
||||||
|
for attempt in range(1, 6):
|
||||||
|
try:
|
||||||
|
submit = _post_json(submit_url, submit_payload, timeout_s=20)
|
||||||
|
break
|
||||||
|
except (http.client.RemoteDisconnected, TimeoutError, urllib.error.URLError) as exc:
|
||||||
|
if attempt == 5:
|
||||||
|
raise SystemExit(f"portal submit failed after {attempt} attempts: {exc}")
|
||||||
|
time.sleep(2)
|
||||||
|
if not isinstance(submit, dict):
|
||||||
|
raise SystemExit("portal submit did not return json")
|
||||||
|
|
||||||
|
request_code = submit.get("request_code")
|
||||||
|
if not isinstance(request_code, str) or not request_code:
|
||||||
|
raise SystemExit(f"request submit did not return request_code: {submit}")
|
||||||
|
|
||||||
|
verify_token = _imap_wait_for_verify_token(
|
||||||
|
host=imap_host,
|
||||||
|
port=imap_port,
|
||||||
|
username=mailu_email,
|
||||||
|
password=mailu_password,
|
||||||
|
request_code=request_code,
|
||||||
|
deadline_sec=imap_wait_sec,
|
||||||
|
)
|
||||||
|
verify_resp = _post_json(
|
||||||
|
f"{portal_base}/api/access/request/verify",
|
||||||
|
{"request_code": request_code, "token": verify_token},
|
||||||
|
timeout_s=30,
|
||||||
|
)
|
||||||
|
if not isinstance(verify_resp, dict) or verify_resp.get("ok") is not True:
|
||||||
|
raise SystemExit(f"unexpected verify response: {verify_resp}")
|
||||||
|
|
||||||
|
portal_admin = _keycloak_find_user(keycloak_base, realm, token, portal_admin_username)
|
||||||
|
if not portal_admin:
|
||||||
|
raise SystemExit(f"unable to locate portal admin user {portal_admin_username!r} via Keycloak admin API")
|
||||||
|
portal_admin_user_id = portal_admin.get("id")
|
||||||
|
if not isinstance(portal_admin_user_id, str) or not portal_admin_user_id:
|
||||||
|
raise SystemExit("portal admin user missing id")
|
||||||
|
|
||||||
|
try:
|
||||||
|
e2e_subject_token = _keycloak_client_token(keycloak_base, realm, portal_e2e_client_id, portal_e2e_client_secret)
|
||||||
|
except SystemExit as exc:
|
||||||
|
raise SystemExit(f"failed to fetch keycloak token for E2E client {portal_e2e_client_id!r}: {exc}")
|
||||||
|
try:
|
||||||
|
portal_bearer = _keycloak_token_exchange(
|
||||||
|
keycloak_base=keycloak_base,
|
||||||
|
realm=realm,
|
||||||
|
client_id=portal_e2e_client_id,
|
||||||
|
client_secret=portal_e2e_client_secret,
|
||||||
|
subject_token=e2e_subject_token,
|
||||||
|
requested_subject=portal_admin_user_id,
|
||||||
|
audience=portal_target_client_id,
|
||||||
|
)
|
||||||
|
except SystemExit as exc:
|
||||||
|
raise SystemExit(f"failed to exchange token for portal approval as {portal_admin_username!r}: {exc}")
|
||||||
|
|
||||||
|
approve_url = f"{portal_base}/api/admin/access/requests/{urllib.parse.quote(username, safe='')}/approve"
|
||||||
|
approve_timeout_s = int(os.environ.get("E2E_APPROVE_TIMEOUT_SECONDS", "180"))
|
||||||
|
approve_attempts = int(os.environ.get("E2E_APPROVE_ATTEMPTS", "3"))
|
||||||
|
approve_resp = None
|
||||||
|
approve_error = None
|
||||||
|
for attempt in range(1, approve_attempts + 1):
|
||||||
|
try:
|
||||||
|
approve_resp = _request_json("POST", approve_url, portal_bearer, payload=None, timeout_s=approve_timeout_s)
|
||||||
|
approve_error = None
|
||||||
|
break
|
||||||
|
except (http.client.RemoteDisconnected, TimeoutError, urllib.error.URLError) as exc:
|
||||||
|
approve_error = str(exc)
|
||||||
|
if attempt == approve_attempts:
|
||||||
|
break
|
||||||
|
time.sleep(3)
|
||||||
|
if approve_resp is None:
|
||||||
|
print(
|
||||||
|
"WARNING: portal approval request did not return a response; "
|
||||||
|
f"continuing to poll status (last_error={approve_error})"
|
||||||
|
)
|
||||||
|
elif not isinstance(approve_resp, dict) or approve_resp.get("ok") is not True:
|
||||||
|
raise SystemExit(f"unexpected approval response: {approve_resp}")
|
||||||
|
|
||||||
|
status_url = f"{portal_base}/api/access/request/status"
|
||||||
|
deadline_s = int(os.environ.get("E2E_DEADLINE_SECONDS", "600"))
|
||||||
|
interval_s = int(os.environ.get("E2E_POLL_SECONDS", "10"))
|
||||||
|
deadline_at = time.monotonic() + deadline_s
|
||||||
|
|
||||||
|
last_status = None
|
||||||
|
last_error = None
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
status_payload = _post_json(status_url, {"request_code": request_code}, timeout_s=60)
|
||||||
|
last_error = None
|
||||||
|
except (http.client.RemoteDisconnected, TimeoutError, urllib.error.URLError) as exc:
|
||||||
|
last_error = str(exc)
|
||||||
|
if time.monotonic() >= deadline_at:
|
||||||
|
raise SystemExit(f"timed out waiting for provisioning to finish (last error={last_error})")
|
||||||
|
time.sleep(interval_s)
|
||||||
|
continue
|
||||||
|
status = status_payload.get("status")
|
||||||
|
if isinstance(status, str):
|
||||||
|
last_status = status
|
||||||
|
|
||||||
|
if status in ("awaiting_onboarding", "ready"):
|
||||||
|
break
|
||||||
|
if status in ("denied", "unknown"):
|
||||||
|
raise SystemExit(f"request transitioned to unexpected terminal status: {status_payload}")
|
||||||
|
if time.monotonic() >= deadline_at:
|
||||||
|
suffix = f" (last error={last_error})" if last_error else ""
|
||||||
|
raise SystemExit(f"timed out waiting for provisioning to finish (last status={last_status}){suffix}")
|
||||||
|
time.sleep(interval_s)
|
||||||
|
|
||||||
|
# Refresh admin token (it may expire during the provisioning wait).
|
||||||
|
token = _keycloak_client_token(keycloak_base, realm, kc_admin_client_id, kc_admin_client_secret)
|
||||||
|
|
||||||
|
user = _keycloak_find_user(keycloak_base, realm, token, username)
|
||||||
|
if not user:
|
||||||
|
raise SystemExit("expected Keycloak user was not created")
|
||||||
|
user_id = user.get("id")
|
||||||
|
if not isinstance(user_id, str) or not user_id:
|
||||||
|
raise SystemExit("created user missing id")
|
||||||
|
|
||||||
|
full = _keycloak_get_user(keycloak_base, realm, token, user_id)
|
||||||
|
required_actions = full.get("requiredActions") or []
|
||||||
|
required: set[str] = set()
|
||||||
|
if isinstance(required_actions, list):
|
||||||
|
required = {a for a in required_actions if isinstance(a, str)}
|
||||||
|
|
||||||
|
unexpected = sorted(required.intersection({"UPDATE_PASSWORD", "VERIFY_EMAIL", "CONFIGURE_TOTP"}))
|
||||||
|
if unexpected:
|
||||||
|
raise SystemExit(
|
||||||
|
"Keycloak user should not require actions at first login "
|
||||||
|
f"(Vaultwarden-first onboarding): unexpected requiredActions={unexpected} full={sorted(required)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
email_verified = full.get("emailVerified")
|
||||||
|
if email_verified is not True:
|
||||||
|
raise SystemExit(f"Keycloak user should have emailVerified=true: emailVerified={email_verified!r}")
|
||||||
|
|
||||||
|
kc_email = full.get("email")
|
||||||
|
if isinstance(kc_email, str) and contact_email and kc_email != contact_email:
|
||||||
|
raise SystemExit(f"Keycloak user email mismatch: expected {contact_email!r} got {kc_email!r}")
|
||||||
|
|
||||||
|
print(f"PASS: onboarding provisioning completed for {request_code} ({username})")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
193
services/bstein-dev-home/scripts/vaultwarden_cred_sync.py
Normal file
193
services/bstein-dev-home/scripts/vaultwarden_cred_sync.py
Normal file
@ -0,0 +1,193 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from typing import Any, Iterable
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from atlas_portal import settings
|
||||||
|
from atlas_portal.keycloak import admin_client
|
||||||
|
from atlas_portal.vaultwarden import invite_user
|
||||||
|
|
||||||
|
|
||||||
|
VAULTWARDEN_EMAIL_ATTR = "vaultwarden_email"
|
||||||
|
VAULTWARDEN_STATUS_ATTR = "vaultwarden_status"
|
||||||
|
VAULTWARDEN_SYNCED_AT_ATTR = "vaultwarden_synced_at"
|
||||||
|
|
||||||
|
|
||||||
|
def _iter_keycloak_users(page_size: int = 200) -> Iterable[dict[str, Any]]:
|
||||||
|
client = admin_client()
|
||||||
|
if not client.ready():
|
||||||
|
raise RuntimeError("keycloak admin client not configured")
|
||||||
|
|
||||||
|
url = f"{settings.KEYCLOAK_ADMIN_URL}/admin/realms/{settings.KEYCLOAK_REALM}/users"
|
||||||
|
first = 0
|
||||||
|
while True:
|
||||||
|
headers = client.headers()
|
||||||
|
# We need attributes for idempotency (vaultwarden_status/vaultwarden_email). Keycloak defaults to a
|
||||||
|
# brief representation which may omit these.
|
||||||
|
params = {"first": str(first), "max": str(page_size), "briefRepresentation": "false"}
|
||||||
|
with httpx.Client(timeout=settings.HTTP_CHECK_TIMEOUT_SEC) as http:
|
||||||
|
resp = http.get(url, params=params, headers=headers)
|
||||||
|
resp.raise_for_status()
|
||||||
|
payload = resp.json()
|
||||||
|
|
||||||
|
if not isinstance(payload, list) or not payload:
|
||||||
|
return
|
||||||
|
|
||||||
|
for item in payload:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
yield item
|
||||||
|
|
||||||
|
if len(payload) < page_size:
|
||||||
|
return
|
||||||
|
first += page_size
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_attr(attrs: Any, key: str) -> str:
|
||||||
|
if not isinstance(attrs, dict):
|
||||||
|
return ""
|
||||||
|
raw = attrs.get(key)
|
||||||
|
if isinstance(raw, list):
|
||||||
|
for item in raw:
|
||||||
|
if isinstance(item, str) and item.strip():
|
||||||
|
return item.strip()
|
||||||
|
return ""
|
||||||
|
if isinstance(raw, str) and raw.strip():
|
||||||
|
return raw.strip()
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _vaultwarden_email_for_user(user: dict[str, Any]) -> str:
|
||||||
|
username = (user.get("username") if isinstance(user.get("username"), str) else "") or ""
|
||||||
|
username = username.strip()
|
||||||
|
if not username:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
attrs = user.get("attributes")
|
||||||
|
vaultwarden_email = _extract_attr(attrs, VAULTWARDEN_EMAIL_ATTR)
|
||||||
|
if vaultwarden_email:
|
||||||
|
return vaultwarden_email
|
||||||
|
|
||||||
|
mailu_email = _extract_attr(attrs, "mailu_email")
|
||||||
|
if mailu_email:
|
||||||
|
return mailu_email
|
||||||
|
|
||||||
|
email = (user.get("email") if isinstance(user.get("email"), str) else "") or ""
|
||||||
|
email = email.strip()
|
||||||
|
if email and email.lower().endswith(f"@{settings.MAILU_DOMAIN.lower()}"):
|
||||||
|
return email
|
||||||
|
|
||||||
|
# Don't guess an internal mailbox address until Mailu sync has run and stored mailu_email.
|
||||||
|
# This avoids spamming Vaultwarden invites that can never be delivered (unknown recipient).
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _set_user_attribute_if_missing(username: str, user: dict[str, Any], key: str, value: str) -> None:
|
||||||
|
value = (value or "").strip()
|
||||||
|
if not value:
|
||||||
|
return
|
||||||
|
existing = _extract_attr(user.get("attributes"), key)
|
||||||
|
if existing:
|
||||||
|
return
|
||||||
|
admin_client().set_user_attribute(username, key, value)
|
||||||
|
|
||||||
|
|
||||||
|
def _set_user_attribute(username: str, key: str, value: str) -> None:
|
||||||
|
value = (value or "").strip()
|
||||||
|
if not value:
|
||||||
|
return
|
||||||
|
admin_client().set_user_attribute(username, key, value)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
processed = 0
|
||||||
|
created = 0
|
||||||
|
skipped = 0
|
||||||
|
failures = 0
|
||||||
|
|
||||||
|
for user in _iter_keycloak_users():
|
||||||
|
username = (user.get("username") if isinstance(user.get("username"), str) else "") or ""
|
||||||
|
username = username.strip()
|
||||||
|
if not username:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
enabled = user.get("enabled")
|
||||||
|
if enabled is False:
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if user.get("serviceAccountClientId") or username.startswith("service-account-"):
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Fetch the full user payload so we can reliably read attributes (and skip re-invites).
|
||||||
|
user_id = (user.get("id") if isinstance(user.get("id"), str) else "") or ""
|
||||||
|
user_id = user_id.strip()
|
||||||
|
full_user = user
|
||||||
|
if user_id:
|
||||||
|
try:
|
||||||
|
full_user = admin_client().get_user(user_id)
|
||||||
|
except Exception:
|
||||||
|
full_user = user
|
||||||
|
|
||||||
|
current_status = _extract_attr(full_user.get("attributes"), VAULTWARDEN_STATUS_ATTR)
|
||||||
|
current_synced_at = _extract_attr(full_user.get("attributes"), VAULTWARDEN_SYNCED_AT_ATTR)
|
||||||
|
email = _vaultwarden_email_for_user(full_user)
|
||||||
|
if not email:
|
||||||
|
print(f"skip {username}: missing email", file=sys.stderr)
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
_set_user_attribute_if_missing(username, full_user, VAULTWARDEN_EMAIL_ATTR, email)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# If we've already successfully invited or confirmed presence, do not re-invite on every cron run.
|
||||||
|
# Vaultwarden returns 409 for "already exists", which is idempotent but noisy and can trigger rate limits.
|
||||||
|
if current_status in {"invited", "already_present"}:
|
||||||
|
if not current_synced_at:
|
||||||
|
try:
|
||||||
|
_set_user_attribute(
|
||||||
|
username,
|
||||||
|
VAULTWARDEN_SYNCED_AT_ATTR,
|
||||||
|
time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
skipped += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
processed += 1
|
||||||
|
result = invite_user(email)
|
||||||
|
if result.ok:
|
||||||
|
created += 1
|
||||||
|
print(f"ok {username}: {result.status}")
|
||||||
|
try:
|
||||||
|
_set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status)
|
||||||
|
_set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
failures += 1
|
||||||
|
print(f"err {username}: {result.status} {result.detail}", file=sys.stderr)
|
||||||
|
try:
|
||||||
|
_set_user_attribute(username, VAULTWARDEN_STATUS_ATTR, result.status)
|
||||||
|
_set_user_attribute(username, VAULTWARDEN_SYNCED_AT_ATTR, time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"done processed={processed} created_or_present={created} skipped={skipped} failures={failures}",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
return 0 if failures == 0 else 2
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
59
services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml
Normal file
59
services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
# services/bstein-dev-home/vaultwarden-cred-sync-cronjob.yaml
|
||||||
|
apiVersion: batch/v1
|
||||||
|
kind: CronJob
|
||||||
|
metadata:
|
||||||
|
name: vaultwarden-cred-sync
|
||||||
|
namespace: bstein-dev-home
|
||||||
|
spec:
|
||||||
|
schedule: "*/15 * * * *"
|
||||||
|
concurrencyPolicy: Forbid
|
||||||
|
successfulJobsHistoryLimit: 1
|
||||||
|
failedJobsHistoryLimit: 3
|
||||||
|
jobTemplate:
|
||||||
|
spec:
|
||||||
|
backoffLimit: 0
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
serviceAccountName: bstein-dev-home
|
||||||
|
restartPolicy: Never
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/arch: arm64
|
||||||
|
node-role.kubernetes.io/worker: "true"
|
||||||
|
imagePullSecrets:
|
||||||
|
- name: harbor-bstein-robot
|
||||||
|
containers:
|
||||||
|
- name: sync
|
||||||
|
image: registry.bstein.dev/bstein/bstein-dev-home-backend:0.1.1-92 # {"$imagepolicy": "bstein-dev-home:bstein-dev-home-backend"}
|
||||||
|
imagePullPolicy: Always
|
||||||
|
command:
|
||||||
|
- python
|
||||||
|
- /scripts/vaultwarden_cred_sync.py
|
||||||
|
env:
|
||||||
|
- name: PYTHONPATH
|
||||||
|
value: /app
|
||||||
|
- name: KEYCLOAK_ENABLED
|
||||||
|
value: "true"
|
||||||
|
- name: KEYCLOAK_REALM
|
||||||
|
value: atlas
|
||||||
|
- name: KEYCLOAK_ADMIN_URL
|
||||||
|
value: http://keycloak.sso.svc.cluster.local
|
||||||
|
- name: KEYCLOAK_ADMIN_REALM
|
||||||
|
value: atlas
|
||||||
|
- name: KEYCLOAK_ADMIN_CLIENT_ID
|
||||||
|
value: bstein-dev-home-admin
|
||||||
|
- name: KEYCLOAK_ADMIN_CLIENT_SECRET
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: bstein-dev-home-keycloak-admin
|
||||||
|
key: client_secret
|
||||||
|
- name: HTTP_CHECK_TIMEOUT_SEC
|
||||||
|
value: "20"
|
||||||
|
volumeMounts:
|
||||||
|
- name: vaultwarden-cred-sync-script
|
||||||
|
mountPath: /scripts
|
||||||
|
readOnly: true
|
||||||
|
volumes:
|
||||||
|
- name: vaultwarden-cred-sync-script
|
||||||
|
configMap:
|
||||||
|
name: vaultwarden-cred-sync-script
|
||||||
|
defaultMode: 0555
|
||||||
@ -1,31 +0,0 @@
|
|||||||
# services/ci-demo/deployment.yaml
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: ci-demo
|
|
||||||
namespace: ci-demo
|
|
||||||
spec:
|
|
||||||
replicas: 1
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app.kubernetes.io/name: ci-demo
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app.kubernetes.io/name: ci-demo
|
|
||||||
spec:
|
|
||||||
nodeSelector:
|
|
||||||
hardware: rpi4
|
|
||||||
containers:
|
|
||||||
- name: ci-demo
|
|
||||||
image: registry.bstein.dev/infra/ci-demo:latest
|
|
||||||
ports:
|
|
||||||
- name: http
|
|
||||||
containerPort: 8080
|
|
||||||
readinessProbe:
|
|
||||||
httpGet:
|
|
||||||
path: /
|
|
||||||
port: http
|
|
||||||
initialDelaySeconds: 2
|
|
||||||
periodSeconds: 5
|
|
||||||
|
|
||||||
@ -1,24 +0,0 @@
|
|||||||
# services/ci-demo/image.yaml
|
|
||||||
apiVersion: image.toolkit.fluxcd.io/v1
|
|
||||||
kind: ImageRepository
|
|
||||||
metadata:
|
|
||||||
name: ci-demo
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
image: registry.bstein.dev/infra/ci-demo
|
|
||||||
interval: 1m0s
|
|
||||||
---
|
|
||||||
apiVersion: image.toolkit.fluxcd.io/v1
|
|
||||||
kind: ImagePolicy
|
|
||||||
metadata:
|
|
||||||
name: ci-demo
|
|
||||||
namespace: flux-system
|
|
||||||
spec:
|
|
||||||
imageRepositoryRef:
|
|
||||||
name: ci-demo
|
|
||||||
filterTags:
|
|
||||||
pattern: '^v(?P<version>0\.0\.0-\d+)$'
|
|
||||||
extract: '$version'
|
|
||||||
policy:
|
|
||||||
semver:
|
|
||||||
range: ">=0.0.0-0"
|
|
||||||
@ -1,11 +0,0 @@
|
|||||||
# services/ci-demo/kustomization.yaml
|
|
||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
|
||||||
kind: Kustomization
|
|
||||||
resources:
|
|
||||||
- namespace.yaml
|
|
||||||
- image.yaml
|
|
||||||
- deployment.yaml
|
|
||||||
- service.yaml
|
|
||||||
images:
|
|
||||||
- name: registry.bstein.dev/infra/ci-demo
|
|
||||||
newTag: registry.bstein.dev/infra/ci-demo:v0.0.0-3 # {"$imagepolicy": "flux-system:ci-demo"}
|
|
||||||
@ -1,6 +0,0 @@
|
|||||||
# services/ci-demo/namespace.yaml
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Namespace
|
|
||||||
metadata:
|
|
||||||
name: ci-demo
|
|
||||||
|
|
||||||
31
services/comms/NOTES.md
Normal file
31
services/comms/NOTES.md
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
# services/comms/NOTES.md
|
||||||
|
|
||||||
|
Purpose: Matrix + Element + LiveKit stack for Othrys (live.bstein.dev).
|
||||||
|
|
||||||
|
Core flow
|
||||||
|
- Matrix Authentication Service (MAS) handles login/SSO and issues Matrix access tokens.
|
||||||
|
- Synapse is the homeserver; MAS fronts login, Synapse serves client/server APIs.
|
||||||
|
- Element Web provides the main UI; Element Call embeds LiveKit for group video.
|
||||||
|
- LiveKit handles SFU media; Coturn provides TURN for NAT traversal.
|
||||||
|
- matrix-guest-register provisions MAS guest accounts and performs MAS password login to mint device-bound guest tokens (no Keycloak).
|
||||||
|
|
||||||
|
Operational jobs
|
||||||
|
- mas-db-ensure-job: ensures MAS database role/database + secret in comms.
|
||||||
|
- comms-secrets-ensure-job: creates runtime secrets (TURN, LiveKit, Synapse, atlasbot).
|
||||||
|
- synapse-signingkey-ensure-job: ensures Synapse signing key secret.
|
||||||
|
- synapse-seeder-admin-ensure-job: ensures Synapse admin user exists.
|
||||||
|
- synapse-user-seed-job: seeds atlasbot + othrys-seeder users/passwords.
|
||||||
|
- mas-local-users-ensure-job: ensures MAS local users exist (seeder/bot).
|
||||||
|
- seed-othrys-room: (suspended) creates Othrys + joins locals.
|
||||||
|
- reset-othrys-room: suspended CronJob for a manual room reset + pin invite.
|
||||||
|
- pin-othrys-invite: (suspended) pin invite message if missing.
|
||||||
|
- guest-name-randomizer: renames numeric/guest users to adj-noun names.
|
||||||
|
- bstein-force-leave: one-off room leave cleanup.
|
||||||
|
|
||||||
|
Manual re-runs
|
||||||
|
- Unsuspend a CronJob only when needed; re-suspend after completion.
|
||||||
|
|
||||||
|
Ports
|
||||||
|
- Traefik (HTTPS) via LB on 192.168.22.9.
|
||||||
|
- Coturn LB on 192.168.22.5 (3478/5349 + UDP range).
|
||||||
|
- LiveKit LB on 192.168.22.6 (7880/7881/7882/7883).
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user